##### Libraries for sptial information in map, map projection and map ploting
library(maps)
#Import California map with counties
ca.county = map("county","california", fill=TRUE, plot=FALSE)
library(mapproj)
library(spdep)
library(maptools)
library(classInt)
library(RColorBrewer)
library(ggpubr)
##### Libraries for matrix, distribution and math computation
library(mvtnorm)
library(matrixStats)
library(blockmatrix)
library(Matrix)
library(invgamma)
library(mnormt)
library(MASS)
library(Hmisc) #rcorr
library(gtools) #permutation
#library(fields)
#library(msos)
#library(boot)
#library(ggmcmc)
#library(mcmc)
#library(magic)
#library(AICcmodavg)
#library(coda)
##### Libraries for JAGS
library(rjags)
library(R2jags)
##### Draw plots
library(ggplot2)
##### Data management
library(tidyr)
##### Import and export documents
library(knitr)
library(readr)
##### Bayesian inference
library(LaplacesDemon)
##### Model selection
library(bridgesampling)
Cancers: lung, esophageal, larynx and colorectal
Outcome: Standard incidence ratios (SMR\(_{id} = Y_{id}/E_{id}\)) in the years from 2012 to 2016.
\(Y_{id}\) - observed counts of incidence for each cancer \(d\) in each county \(i\)
\(E_{id}\) - expected number of cases for each cancer \(d\) in each county \(i\)
Covariates (percentage): adult cigarette smoking rates, percentages of residents younger than 18 years old, older than 65 years old, with education level below high school, percentages of unemployed residents, black residents, male residents, uninsured residents, and percentages of families below the poverty threshold
#Import covariates
covariates <- read.csv("data/covariates.csv")
race <- read.csv("data/race.csv")
sex <- read.csv("data/sex.csv")
insurance <- read.csv("data/insurance.csv")
smoking <- read.csv("data/smoking.csv")
smoking$smoking <- as.numeric(substr(smoking$Cigarette.Smoking.Rate., 1,4))
#Import age-adjusted incidence rates for 4 cancers in California
rate_5y <- read.csv("data/age_adjusted.csv")
rate_CA = rate_5y[substr(rate_5y$State_county,1,2) == "CA",]
rate_lung = rate_CA[rate_CA$Site_recode_ICD_O_3_WHO_2008=="Lung and Bronchus",]
rate_lung = rate_lung[order(readr::parse_number(as.character(rate_lung$State_county))),]
rate_lung$E_count = (sum(rate_lung$Count) / sum(rate_lung$Population)) * rate_lung$Population
rate_lung$standard_ratio = rate_lung$Count / rate_lung$E_count
rate_esophagus = rate_CA[rate_CA$Site_recode_ICD_O_3_WHO_2008=="Esophagus",]
rate_esophagus = rate_esophagus[order(readr::parse_number(as.character(rate_esophagus$State_county))),]
rate_esophagus$E_count = (sum(rate_esophagus$Count) / sum(rate_esophagus$Population)) * rate_esophagus$Population
rate_esophagus$standard_ratio = rate_esophagus$Count / rate_esophagus$E_count
rate_larynx = rate_CA[rate_CA$Site_recode_ICD_O_3_WHO_2008=="Larynx",]
rate_larynx = rate_larynx[order(readr::parse_number(as.character(rate_larynx$State_county))),]
rate_larynx$E_count = (sum(rate_larynx$Count) / sum(rate_larynx$Population)) * rate_larynx$Population
rate_larynx$standard_ratio = rate_larynx$Count / rate_larynx$E_count
rate_colrect = rate_CA[rate_CA$Site_recode_ICD_O_3_WHO_2008=="Colon and Rectum",]
rate_colrect = rate_colrect[order(readr::parse_number(as.character(rate_colrect$State_county))),]
rate_colrect$E_count = (sum(rate_colrect$Count) / sum(rate_colrect$Population)) * rate_colrect$Population
rate_colrect$standard_ratio = rate_colrect$Count / rate_colrect$E_count
################## Data construction ###########
## Data
# Covariates in percentage
county_attribute = covariates[substr(covariates$State_county,1,2) == "CA",]
county_attribute$state = extract_numeric(county_attribute$State_county)
county_attribute1 = data.frame(county_attribute[order(county_attribute$state),])
county_attribute1$V_Persons_age_18_ACS_2012_2016 = as.numeric(county_attribute1$V_Persons_age_18_ACS_2012_2016)/100
county_attribute1$V_Persons_age_65_ACS_2012_2016 = as.numeric(county_attribute1$V_Persons_age_65_ACS_2012_2016)/100
county_attribute1$VHighschooleducationACS2012201 = as.numeric(county_attribute1$VHighschooleducationACS2012201)/100
county_attribute1$VFamiliesbelowpovertyACS201220 = as.numeric(county_attribute1$VFamiliesbelowpovertyACS201220)/100
county_attribute1$V_Unemployed_ACS_2012_2016 = as.numeric(county_attribute1$V_Unemployed_ACS_2012_2016)/100
race1 = race[substr(race$State_county,1,2) == "CA"&race$Race_recode_White_Black_Other=="Black",]
sex1 = sex[substr(sex$State_county,1,2) == "CA"&sex$Sex=="Male",]
insurance1 = insurance[substr(insurance$State_county,1,2) == "CA"&insurance$Insurance_Recode_2007=="Uninsured",]
rate_lung1 = cbind(rate_lung, smoking$smoking, county_attribute1[,2:6], race1$Row_Percent, sex1$Row_Percent,insurance1$Row_Percent)
colnames(rate_lung1) = c("county", "site", "rate", "count", "population", "E_count", "standard_ratio", "smoking", "young","old", "highschool", "poverty", "unemployed", "black", "male", "uninsured")
rate_esophagus1 = cbind(rate_esophagus, smoking$smoking, county_attribute1[,2:6], race1$Row_Percent, sex1$Row_Percent,insurance1$Row_Percent)
colnames(rate_esophagus1) = c("county", "site", "rate", "count", "population", "E_count", "standard_ratio", "smoking", "young","old", "highschool", "poverty", "unemployed", "black", "male", "uninsured")
rate_larynx1 = cbind(rate_larynx, smoking$smoking, county_attribute1[,2:6], race1$Row_Percent, sex1$Row_Percent,insurance1$Row_Percent)
colnames(rate_larynx1) = c("county", "site", "rate", "count", "population", "E_count", "standard_ratio", "smoking", "young","old", "highschool", "poverty", "unemployed", "black", "male", "uninsured")
rate_colrect1 = cbind(rate_colrect, smoking$smoking, county_attribute1[,2:6], race1$Row_Percent, sex1$Row_Percent,insurance1$Row_Percent)
colnames(rate_colrect1) = c("county", "site", "rate", "count", "population", "E_count", "standard_ratio", "smoking", "young","old", "highschool", "poverty", "unemployed", "black", "male", "uninsured")
To plot the cancer data, we need to combine the data with the map polygon
#County information
county.ID <- sapply(strsplit(ca.county$names, ","), function(x) x[2])
ca.poly = map2SpatialPolygons(ca.county, IDs=county.ID)
ca.poly$rate_lung = rate_lung$standard_ratio
ca.poly$rate_esophagus = rate_esophagus$standard_ratio
ca.poly$rate_larynx = rate_larynx$standard_ratio
ca.poly$rate_colrect = rate_colrect$standard_ratio
ca.poly$smoking = smoking$smoking
ca.coords = coordinates(ca.poly)
##################### Plot SIR in areal map ##################
brks_fit_lung = quantile(ca.poly$rate_lung, c(0, 0.2, 0.4, 0.6, 0.8, 1))
brks_fit_esophagus = quantile(ca.poly$rate_esophagus, c(0, 0.2, 0.4, 0.6, 0.8, 1))
brks_fit_larynx = quantile(ca.poly$rate_larynx, c(0, 0.2, 0.4, 0.6, 0.8, 1))
brks_fit_colrect = quantile(ca.poly$rate_colrect, c(0, 0.2, 0.4, 0.6, 0.8, 1))
color.pallete = rev(brewer.pal(5,"RdBu"))
class.rate_lung = classIntervals(var=ca.poly$rate_lung, n=5, style="fixed",
fixedBreaks=brks_fit_lung, dataPrecision=4)
class.rate_esophagus = classIntervals(var=ca.poly$rate_esophagus, n=5, style="fixed",
fixedBreaks=brks_fit_esophagus, dataPrecision=4)
class.rate_larynx = classIntervals(var=ca.poly$rate_larynx, n=5, style="fixed",
fixedBreaks=brks_fit_larynx, dataPrecision=4)
class.rate_colrect = classIntervals(var=ca.poly$rate_colrect, n=5, style="fixed",
fixedBreaks=brks_fit_colrect, dataPrecision=4)
color.code.rate_lung = findColours(class.rate_lung, color.pallete)
color.code.rate_esophagus = findColours(class.rate_esophagus, color.pallete)
color.code.rate_larynx = findColours(class.rate_larynx, color.pallete)
color.code.rate_colrect = findColours(class.rate_colrect, color.pallete)
par(mfrow=c(2,2), oma = c(0,0,4,0) + 0.1, mar = c(0,0,2,0) + 0.1)
plot(ca.poly, col = color.code.rate_lung, main = "Lung cancer")
plot(ca.poly, col = color.code.rate_esophagus, main = "Esophageal cancer")
plot(ca.poly, col = color.code.rate_larynx, main = "larynx cancer")
plot(ca.poly, col = color.code.rate_colrect, main = "colorectal cancer")
leg.txt = c("0-20%", "20%-40%", "40%-60%", "60%-80%", "80%-100%")
mtext("Figure 1. Maps of standardized incidence ratios (SIR) for lung, esophageal, larynx and colocrectal cancer in California, 2012 - 2016", outer = TRUE, cex = 1)
par(fig = c(0, 1, 0, 1), oma = c(0, 0, 0, 0), mar = c(0, 0, 0, 0), new = TRUE)
plot(0, 0, type = "n", bty = "n", xaxt = "n", yaxt = "n")
legend("center", legend=leg.txt, xpd = TRUE, cex=1.5, bty="n", horiz = FALSE,
fill = color.pallete)
################## Plot smoking rates in areal map ##################
brks_fit_smoking = quantile(ca.poly$smoking, c(0, 0.2, 0.4, 0.6, 0.8, 1))
class.rate_smoking = classIntervals(var=ca.poly$smoking, n=7, style="fixed",
fixedBreaks=brks_fit_smoking, dataPrecision=4)
color.code.rate_smoking = findColours(class.rate_smoking, color.pallete)
par(oma = c(0,0,4,0) + 0.1, mar = c(0,0,2,0) + 0.1)
plot(ca.poly, col = color.code.rate_smoking, main = "Figure 2. Map of adult cigarette smoking rates in California, 2014 - 2016", size = 1)
leg.txt = c("0-20%", "20%-40%", "40%-60%", "60%-80%", "80%-100%")
legend("bottomleft", legend=leg.txt, xpd = TRUE, cex=1, bty="n", horiz = FALSE,
fill = color.pallete)
The matrix ``Adj’’ is the adjacency matrix for counties in California. We reorder the counties starting from northwest to southeast for directed graph. Then we put all the islands (counties with no parent neighbors) at the beginning. There are \(58\) counties in California.
ca.neighbors = poly2nb(ca.poly)
n=length(ca.neighbors)
# original adjacency matrix
Adj=sapply(ca.neighbors,function(x,n) {v=rep(0,n);v[x]=1;v},n)
colnames(Adj)=county.ID
ca.coord = coordinates(ca.poly)
ca.latrange=round(quantile(ca.coord[,2],c(0.25,0.75)))
ca.albersproj=mapproject(ca.coord[,1],ca.coord[,2],projection = "albers",param=ca.latrange)
# reorder counties from northwest to southeast
perm=order(ca.albersproj$x-ca.albersproj$y)
Adj_new=Adj[perm,perm]
n=nrow(Adj_new)
ni=rowSums(Adj_new)
maxn=max(ni)
neimat=matrix(0,n,maxn)
neighbors=lapply(1:n,function(x) which(Adj_new[x,]==1))
dneighbors=sapply(2:n,function(i) intersect(neighbors[[i]],1:(i-1)))
dni=sapply(dneighbors,length)
original_perm = 1:58
index2=c(1,which(dni==0)+1)
# final permutation for counties: put all the islands (counties with no parent neighbors) at the beginning
final_perm=c(original_perm[perm][index2],
original_perm[perm][-index2])
colnames(Adj)[final_perm]
## [1] "del norte" "san francisco" "humboldt" "siskiyou"
## [5] "trinity" "shasta" "modoc" "mendocino"
## [9] "tehama" "glenn" "lake" "lassen"
## [13] "colusa" "butte" "sonoma" "plumas"
## [17] "napa" "sutter" "yuba" "marin"
## [21] "yolo" "sierra" "nevada" "solano"
## [25] "placer" "sacramento" "contra costa" "san mateo"
## [29] "el dorado" "alameda" "amador" "san joaquin"
## [33] "santa cruz" "calaveras" "santa clara" "alpine"
## [37] "stanislaus" "tuolumne" "merced" "mariposa"
## [41] "san benito" "monterey" "mono" "madera"
## [45] "fresno" "kings" "san luis obispo" "tulare"
## [49] "santa barbara" "inyo" "kern" "ventura"
## [53] "los angeles" "orange" "san bernardino" "riverside"
## [57] "san diego" "imperial"
Minc = Adj[final_perm,final_perm]
n=nrow(Minc)
ni=rowSums(Minc)
maxn=max(ni)
neimat=matrix(0,n,maxn)
neighbors=lapply(1:n,function(x) which(Minc[x,]==1))
#directed or parent neighbors
dneighbors=sapply(2:n,function(i) intersect(neighbors[[i]],1:(i-1)))
dni=sapply(dneighbors,length)
nmax=max(dni)
cni=cumsum(dni)
dneimat=sapply(dneighbors, function(nei,nmax,n) c(nei,rep(n+1,nmax+1-length(nei))),nmax,n)
udnei=unlist(dneighbors)
ni_wo = sapply(neighbors,length)
cni_wo = cumsum(ni_wo)
udnei_wo = unlist(neighbors)
cn = c(0, cni)
ns = dni
region = seq(1:n)
cn = c(0, cni)
ns = dni
index = list()
for(i in 1:(n-2)){
index[[i]] = region[-(udnei[(cn[i+1] + 1):(cn[i+1] + ns[i+1])])]
}
index1 = unlist(index)
Here are a total of 139 pairs of neighboring counties in California.
# Neighboring counties information
neighborvec0 = NULL
neighbor_list0 = NULL
neighbor_name = NULL
for(i in 1:(n-1)){
for(j in (i+1):n){
if(Adj[i,j] == 1){
neighborvec0 = c(neighborvec0, paste(i, ",", j, sep=""))
neighbor_list0 = rbind(neighbor_list0, c(i,j))
neighbor_name = c(neighbor_name, paste(colnames(Adj)[i], ", ", colnames(Adj)[j], sep=""))
}
}
}
neighbor_name
## [1] "alameda, contra costa" "alameda, san joaquin"
## [3] "alameda, santa clara" "alameda, stanislaus"
## [5] "alpine, amador" "alpine, calaveras"
## [7] "alpine, el dorado" "alpine, mono"
## [9] "alpine, tuolumne" "amador, calaveras"
## [11] "amador, el dorado" "amador, sacramento"
## [13] "amador, san joaquin" "butte, colusa"
## [15] "butte, glenn" "butte, plumas"
## [17] "butte, sutter" "butte, tehama"
## [19] "butte, yuba" "calaveras, san joaquin"
## [21] "calaveras, stanislaus" "calaveras, tuolumne"
## [23] "colusa, glenn" "colusa, lake"
## [25] "colusa, sutter" "colusa, yolo"
## [27] "contra costa, sacramento" "contra costa, san joaquin"
## [29] "contra costa, solano" "del norte, humboldt"
## [31] "del norte, siskiyou" "el dorado, placer"
## [33] "el dorado, sacramento" "fresno, inyo"
## [35] "fresno, kings" "fresno, madera"
## [37] "fresno, merced" "fresno, mono"
## [39] "fresno, monterey" "fresno, san benito"
## [41] "fresno, tulare" "glenn, lake"
## [43] "glenn, mendocino" "glenn, tehama"
## [45] "humboldt, mendocino" "humboldt, siskiyou"
## [47] "humboldt, trinity" "imperial, riverside"
## [49] "imperial, san diego" "inyo, kern"
## [51] "inyo, mono" "inyo, san bernardino"
## [53] "inyo, tulare" "kern, kings"
## [55] "kern, los angeles" "kern, monterey"
## [57] "kern, san bernardino" "kern, san luis obispo"
## [59] "kern, santa barbara" "kern, tulare"
## [61] "kern, ventura" "kings, monterey"
## [63] "kings, san luis obispo" "kings, tulare"
## [65] "lake, mendocino" "lake, napa"
## [67] "lake, sonoma" "lake, yolo"
## [69] "lassen, modoc" "lassen, plumas"
## [71] "lassen, shasta" "lassen, sierra"
## [73] "los angeles, orange" "los angeles, san bernardino"
## [75] "los angeles, ventura" "madera, mariposa"
## [77] "madera, merced" "madera, mono"
## [79] "madera, tuolumne" "marin, sonoma"
## [81] "mariposa, merced" "mariposa, stanislaus"
## [83] "mariposa, tuolumne" "mendocino, sonoma"
## [85] "mendocino, tehama" "mendocino, trinity"
## [87] "merced, san benito" "merced, santa clara"
## [89] "merced, stanislaus" "merced, tuolumne"
## [91] "modoc, shasta" "modoc, siskiyou"
## [93] "mono, tuolumne" "monterey, san benito"
## [95] "monterey, san luis obispo" "monterey, santa cruz"
## [97] "napa, solano" "napa, sonoma"
## [99] "napa, yolo" "nevada, placer"
## [101] "nevada, sierra" "nevada, yuba"
## [103] "orange, riverside" "orange, san bernardino"
## [105] "orange, san diego" "placer, sacramento"
## [107] "placer, sutter" "placer, yuba"
## [109] "plumas, shasta" "plumas, sierra"
## [111] "plumas, tehama" "plumas, yuba"
## [113] "riverside, san bernardino" "riverside, san diego"
## [115] "sacramento, san joaquin" "sacramento, solano"
## [117] "sacramento, sutter" "sacramento, yolo"
## [119] "san benito, santa clara" "san benito, santa cruz"
## [121] "san francisco, san mateo" "san joaquin, santa clara"
## [123] "san joaquin, stanislaus" "san luis obispo, santa barbara"
## [125] "san mateo, santa clara" "san mateo, santa cruz"
## [127] "santa barbara, ventura" "santa clara, santa cruz"
## [129] "santa clara, stanislaus" "shasta, siskiyou"
## [131] "shasta, tehama" "shasta, trinity"
## [133] "sierra, yuba" "siskiyou, trinity"
## [135] "solano, yolo" "stanislaus, tuolumne"
## [137] "sutter, yolo" "sutter, yuba"
## [139] "tehama, trinity"
To explore the spatial association for each disease, we calculate Moran’s I based upon rth order neighbors for each cancer and plot the areal correlogram.
# distance matrix
projmat=cbind(ca.albersproj$x,ca.albersproj$y)
dmat=as.matrix(dist(projmat))
# Moran's I function
moranI <- function(y, A){
n = length(y)
nom_sum = 0
den_sum = 0
for(i in 1:n){
den_sum = den_sum + (y[i]-mean(y))^2
for(j in 1:n){
nom_sum = nom_sum + A[i,j]*(y[i]-mean(y))*(y[j]-mean(y))
}
}
return(n*nom_sum/sum(A)/den_sum)
}
lung_moran = c()
esophagus_moran = c()
larynx_moran = c()
colrect_moran = c()
for(lag in 1:11){
A_1 <- as.matrix((dmat <= 0.01*lag & dmat > 0.01*(lag-1))*1)
diag(A_1) = 0
lung_moran[lag] <- as.numeric(moranI(rate_lung$standard_ratio, A_1))
esophagus_moran[lag] <- as.numeric(moranI(rate_esophagus$standard_ratio, A_1))
larynx_moran[lag] <- as.numeric(moranI(rate_larynx$standard_ratio, A_1))
colrect_moran[lag] <- as.numeric(moranI(rate_colrect$standard_ratio, A_1))
}
moran_value = c(lung_moran, esophagus_moran, larynx_moran, colrect_moran)
cancer = c(rep("Lung", 11), rep("Esophageal", 11), rep("Larynx", 11), rep("Colorectum", 11))
df = data.frame(cancer)
df$moran_value = moran_value
df$r = rep(1:11, 4)
df$cancer = factor(df$cancer, levels = c("Lung", "Esophageal", "Larynx", "Colorectum"))
ggplot(df, aes(r, moran_value)) + geom_point() +
ylab("Moran's I") + facet_wrap(~cancer) + theme_bw() +
scale_x_continuous(breaks = 1:11) +
ggtitle("Figure 3. Moran’s I of rth order neighbors for lung, esophageal, larynx and colorectum cancer") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.title = element_text(size=11))
cancer_ratio = cbind(rate_lung$standard_ratio, rate_esophagus$standard_ratio, rate_larynx$standard_ratio, rate_colrect$standard_ratio)
colnames(cancer_ratio) = c("Lung and Bronchus", "Esophageal", "Larynx", "Colon and Rectum")
cor_matrix <- rcorr(cancer_ratio)
cor_matrix
## Lung and Bronchus Esophageal Larynx Colon and Rectum
## Lung and Bronchus 1.00 0.80 0.59 0.88
## Esophageal 0.80 1.00 0.58 0.66
## Larynx 0.59 0.58 1.00 0.50
## Colon and Rectum 0.88 0.66 0.50 1.00
##
## n= 58
##
##
## P
## Lung and Bronchus Esophageal Larynx Colon and Rectum
## Lung and Bronchus 0 0 0
## Esophageal 0 0 0
## Larynx 0 0 0
## Colon and Rectum 0 0 0
We apply two joint models (MDAGAR and MCAR) and two independent-disease models (DAGAR\(_{ind}\) and CAR\(_{ind}\)) for the multivariate difference boundary detection. We only include intercept for fixed effects and exclude covariates.
The two joint models are implemented using Metropolis within Gibbs Sampler in R, and two independent-disease models are implemented using rjags. For MDAGAR models, we detect difference boundaries for each cancer individually, shared difference boundaries, and cross-cancer boundaries. For MCAR model, we only show difference boundary detection for each cancer individually. For DAGAR\(_{ind}\) and CAR\(_{ind}\) models, the difference boundaries for each cancer individually and shared difference boundaries are detected to compare with joint models. We also calculate D scores to compare the model fitting for four models.
For each model, it takes up to 24 hrs to run. To save time, we include the code for DAGAR\(_{ind}\) and CAR\(_{ind}\) without running for result. You can change the setting by deleting “eval = FALSE” in corresponding code chunk.
We run models for incidence data without covariates (only include intercept)
Y1 = rate_lung1$count[final_perm]
Y2 = rate_esophagus1$count[final_perm]
Y3 = rate_larynx1$count[final_perm]
Y4 = rate_colrect1$count[final_perm]
E1 = rate_lung1$E_count[final_perm]
E2 = rate_esophagus1$E_count[final_perm]
E3 = rate_larynx1$E_count[final_perm]
E4 = rate_colrect1$E_count[final_perm]
X1 = as.matrix(cbind(1,rate_lung1[,8:14]))[final_perm,]
X2 = as.matrix(cbind(1,rate_esophagus1[,8:14]))[final_perm,]
X3 = as.matrix(cbind(1,rate_larynx1[,8:14]))[final_perm,]
X4 = as.matrix(cbind(1,rate_colrect1[,8:14]))[final_perm,]
Y = c(Y1,Y2,Y3,Y4)
E = c(E1, E2, E3, E4)
X = as.matrix(bdiag(bdiag(X1[,1], X2[,1]), bdiag(X3[,1],X4[,1])))
Functions for Joint models
# compute stick-breaking weights
makeprobs<-function(v){
m<-length(v)
p <-matrix(0,m,m)
for(j in 2:m){
p[1:(j-1),j]<-1
}
probs<-exp(log(v)+log(1-v)%*%p)
probs[m]<-1-sum(probs[1:(m-1)])
probs
}
# compute u weights from F_r and prob
makeu<- function(F_r,probs){
m1=length(F_r)
m2=length(probs)
u=rep(0,m1)
for (k in 1:m1){
for (l in 1:m2){
if (sum(probs[1:(l-1)])<F_r[k] && F_r[k]<sum(probs[1:l])){
u[k]=l
}
if (u[k]==0){
u[k]=1
}
}
}
return(u)
}
# Jacobian matrix for updating A
Jacob_A <- function(A){
prod = 1
for(i in 1:nrow(A)){
prod = prod*A[i,i]^(nrow(A)-i+1)
}
return(2^nrow(A)*prod)
}
# Compute presicion matrices of DAGAR
Dinv_new <- function(Rho, n, cn, ns, udnei,q){
Tau <- list()
B <- list()
invD <- list()
for(i in 1:q){
Tau[[i]] <- diag(n)
B[[i]] <- matrix(0, n, n)
for(j in 3:n){
Tau[[i]][j,j] <- (1 + (ns[j-1] - 1) * Rho[i]^2) / (1 - Rho[i]^2)
for(k in (udnei[(cn[j-1] + 1):(cn[j-1] + ns[j-1])])){
B[[i]][j,k] <- Rho[i] / (1 + (ns[j-1] - 1) * Rho[i]^2)
}
}
invD[[i]] <- t(diag(n) - B[[i]]) %*% Tau[[i]] %*% (diag(n) - B[[i]])
}
return(invD)
}
#######################################
### MDAGAR model with no covariates ###
#######################################
# Metropolis within Gibbs Sampler for MCMC updating
ARDP_joint_diff<-function(y=Y, x1=as.matrix(X1[,1]), x2=as.matrix(X2[,1]), x3 = as.matrix(X3[,1]), x4 = as.matrix(X4[,1]),
X = X, E = E, Minc, alpha=1, q = 4, n.atoms=15, runs=10000, burn=1000){
#y: data
#x: covariates
#n.atoms: number of atoms in the mixture dist.
#theta: the theta's (iid from baseline) in the model
#alpha: v~beta(1,alpha)
#u: the index indicator of spatial random effect
#rho: sptatial autocorrelation parameter in DAGAR
#Minc: 0-1 adjacency matrix
#V_r: covariance matrix of joint MDAGAR
#Q: presicion matrix of DAGAR
#r: random effects following DAGAR
#F_r: Marginal CDF of r
#taued: presicion in Baseline of DP for disease d
#taus: precision for theta
nq<-length(y)
n <- nq / q
p<-ncol(X)
y1 <- y[1:n]
y2 <- y[(n+1):(2*n)]
y3 <- y[(2*n+1):(3*n)]
y4 <- y[(3*n+1):(4*n)]
E1 <- E[1:n]
E2 <- E[(n+1):(2*n)]
E3 <- E[(2*n+1):(3*n)]
E4 <- E[(3*n+1):(4*n)]
sigmasq_beta = 10000
keepbeta=matrix(0,runs,p)
keepphi=matrix(0,runs,nq)
keeptheta=matrix(0,runs,n.atoms)
keepA=matrix(0,runs,10)
keepu=matrix(0,runs,nq)
keeprho1=keeprho2=keeprho3=keeprho4=keeptaus = rep(0,runs)
keepv=matrix(0,runs,n.atoms)
keepr=matrix(0,runs,nq)
keepFr=matrix(0,runs,nq)
#initial values
theta=rep(0,n.atoms)
beta1<-rep(0,ncol(x1))
beta2<-rep(0,ncol(x2))
beta3<-rep(0,ncol(x3))
beta4<-rep(0,ncol(x4))
beta = c(beta1, beta2, beta3, beta4)
taus = 1
c=2
d=0.1
d2=1
v<-rep(.1,n.atoms)
probs=makeprobs(v)
#A matrix
rho=c(0.5, 0.5, 0.5, 0.5)
eta = log(rho/(1-rho))
A = matrix(0, q, q)
for(i in 1:q){
for(j in 1:i){
if(j == i){
A[i, j] = exp(rnorm(1))
}else{
A[i, j] = rnorm(1)
}
}
}
Q = Dinv_new(Rho = rho, n, cn, ns, udnei, q)
invQ1 = solve(Q[[1]])
invQ2 = solve(Q[[2]])
invQ3 = solve(Q[[3]])
invQ4 = solve(Q[[4]])
kprod = kronecker(A, diag(n))
invQ = as.matrix(bdiag(bdiag(invQ1, invQ2), bdiag(invQ3, invQ4)))
Vr = as.matrix(forceSymmetric(kprod %*% invQ %*% t(kprod)))
r = rmvnorm(1, rep(0, nq), Vr)
F_r=pnorm(r,0,sqrt(diag(Vr)))
u=makeu(F_r,probs)
phi <- theta[u]
nu = 2
R = 0.1 * diag(q)
acceptr=acceptv=acceptrho=acceptA=0
acceptbeta1 = acceptbeta2=acceptbeta3=acceptbeta4 = 0
accepttheta = 0
count<-afterburn<-0
burn = burn + 1
for(iter in 1:runs){
if(iter == runs){
print(iter)
print(acceptA/(iter-1))
print(acceptr/nq/(iter-1))
print(acceptrho/(iter-1))
print(acceptv/n.atoms/(iter-1))
print(accepttheta/n.atoms/(iter-1))
print(acceptbeta1/(iter-1))
print(acceptbeta2/(iter-1))
print(acceptbeta3/(iter-1))
print(acceptbeta4/(iter-1))
}
######################
### update beta ###
######################
# update beta (intercept only model)
pro_beta1=rnorm(1,beta1,0.02)
MHrate1=sum(-E1*exp(pro_beta1*x1+theta[u][1:n])+y1*(pro_beta1*x1+theta[u][1:n]))-
sum(-E1*exp(beta1*x1+theta[u][1:n])+y1*(beta1*x1+theta[u][1:n]))
if(runif(1,0,1)<exp(MHrate1)){
beta1<-pro_beta1
acceptbeta1=acceptbeta1+1
}
pro_beta2=rnorm(1,beta2,0.05)
MHrate2=sum(-E2*exp(pro_beta2*x2+theta[u][(n+1):(2*n)])+y2*(pro_beta2*x2+theta[u][(n+1):(2*n)]))-
sum(-E2*exp(beta2*x2+theta[u][(n+1):(2*n)])+y2*(beta2*x2+theta[u][(n+1):(2*n)]))
if(runif(1,0,1)<exp(MHrate2)){
beta2<-pro_beta2
acceptbeta2=acceptbeta2+1
}
pro_beta3=rnorm(1,beta3,0.05)
MHrate3=sum(-E3*exp(pro_beta3*x3+theta[u][(2*n+1):(3*n)])+y3*(pro_beta3*x3+theta[u][(2*n+1):(3*n)]))-
sum(-E3*exp(beta3*x3+theta[u][(2*n+1):(3*n)])+y3*(beta3*x3+theta[u][(2*n+1):(3*n)]))
if(runif(1,0,1)<exp(MHrate3)){
beta3<-pro_beta3
acceptbeta3=acceptbeta3+1
}
pro_beta4=rnorm(1,beta4,0.02)
MHrate4=sum(-E4*exp(pro_beta4*x4+theta[u][(3*n+1):(4*n)])+y4*(pro_beta4*x4+theta[u][(3*n+1):(4*n)]))-
sum(-E4*exp(beta4*x4+theta[u][(3*n+1):(4*n)])+y4*(beta4*x4+theta[u][(3*n+1):(4*n)]))
if(runif(1,0,1)<exp(MHrate4)){
beta4<-pro_beta4
acceptbeta4=acceptbeta4+1
}
beta <- c(beta1, beta2, beta3, beta4)
#########################
### update theta ###
#########################
u1 = u[1:n]
u2 = u[(n+1):(2*n)]
u3 = u[(2*n+1):(3*n)]
u4 = u[(3*n+1):(4*n)]
for (j in 1:n.atoms){
count[j]=sum(u==j)
if (count[j]==0){
theta[j]=rnorm(1,0,sqrt(1/taus))
}else{
pro_theta=rnorm(1,theta[j],0.06)
MHrate<-sum(-(E[u==j])*exp(X[u==j,] %*% beta+pro_theta)+(y[u==j])*(X[u==j,] %*% beta+pro_theta))-taus/2*pro_theta^2-
sum(-(E[u==j])*exp(X[u==j,] %*% beta+theta[j])+(y[u==j])*(X[u==j,] %*% beta+theta[j]))+taus/2*theta[j]^2
if(runif(1,0,1)<exp(MHrate)){
theta[j]<-pro_theta
accepttheta=accepttheta+1
}
}
}
######################
### update r ###
######################
for (k in 1:nq){
pro_r=r;pro_Fr=F_r;pro_u=u
pro_r[k]=rnorm(1,r[k],1.8)
pro_Fr[k]=pnorm(pro_r[k],0,sqrt(Vr[k,k]))
pro_u[k]=makeu(pro_Fr[k],probs)
MH=dmvnorm(pro_r,mean=rep(0, nq),sigma=Vr,log=T)+dpois(y[k],E[k]*exp(as.numeric(X[k,]%*%beta)+theta[pro_u[k]]),log=T)-
dmvnorm(r,mean=rep(0, nq),sigma=Vr,log=T)-dpois(y[k],E[k]*exp(as.numeric(X[k,]%*%beta)+theta[u[k]]),log=T)
if(runif(1,0,1)<exp(MH)){
r[k]=pro_r[k]
F_r[k]=pro_Fr[k]
u[k]=pro_u[k]
acceptr=acceptr+1
}
}
######################
### update v ###
######################
for (j in 1:n.atoms){
pro_v=v
pro_v[j]<-rnorm(1,v[j],0.06)
while (pro_v[j]<0 || pro_v[j]>1) {pro_v[j]<-rnorm(1,v[j],0.06)}
pro_probs=makeprobs(pro_v)
pro_u=makeu(F_r,pro_probs)
MH=log(dbeta(pro_v[j],1,alpha))+sum(dpois(y,E*exp(X%*%beta+theta[pro_u]),log=T))-
log(dbeta(v[j],1,alpha))-sum(dpois(y,E*exp(X%*%beta+theta[u]),log=T))
if(runif(1,0,1)<exp(MH)){
v[j]=pro_v[j]
probs=pro_probs
u=pro_u
acceptv=acceptv+1
}
}
######################
### update taus ###
######################
taus=rgamma(1,shape=1/2*n.atoms+c,rate=sum(theta^2)/2+d2)
######################
### update rho ###
######################
pro_eta1 = rnorm(1, eta[1], 0.4)
pro_eta2 = rnorm(1, eta[2], 0.4)
pro_eta3 = rnorm(1, eta[3], 0.4)
pro_eta4 = rnorm(1, eta[4], 0.4)
pro_eta = c(pro_eta1, pro_eta2, pro_eta3, pro_eta4)
pro_rho = exp(pro_eta)/(1+exp(pro_eta))
pro_Q = Dinv_new(Rho = pro_rho, n, cn, ns, udnei, q)
pro_invQ1 = solve(pro_Q[[1]])
pro_invQ2 = solve(pro_Q[[2]])
pro_invQ3 = solve(pro_Q[[3]])
pro_invQ4 = solve(pro_Q[[4]])
kprod = kronecker(A, diag(n))
pro_invQ = as.matrix(bdiag(bdiag(pro_invQ1, pro_invQ2), bdiag(pro_invQ3, pro_invQ4)))
pro_Vr = kprod %*% pro_invQ %*% t(kprod)
MH = dmvnorm(r,mean=rep(0, nq),sigma=as.matrix(forceSymmetric(pro_Vr)),log=T) +
log(pro_rho[1]) + log(1-pro_rho[1]) + log(pro_rho[2]) + log(1-pro_rho[2]) +
log(pro_rho[3]) + log(1-pro_rho[3]) + log(pro_rho[4]) + log(1-pro_rho[4]) -
dmvnorm(r,mean=rep(0, nq),sigma=Vr,log=T) - log(rho[1]) - log(1-rho[1]) - log(rho[2]) - log(1-rho[2]) -
log(rho[3]) - log(1-rho[3]) - log(rho[4]) - log(1-rho[4])
if(runif(1,0,1)<exp(MH)){
eta = pro_eta
rho = pro_rho
Vr = as.matrix(forceSymmetric(pro_Vr))
acceptrho=acceptrho+1
}
######################
### update A ###
######################
pro_A = matrix(0, q, q)
for(i in 1:q){
for(j in 1:i){
if(j == i){
pro_A[i, j] = exp(rnorm(1, log(A[i, j]), 0.04))
}else{
pro_A[i, j] = rnorm(1, A[i, j], 0.03)
}
}
}
Q = Dinv_new(Rho = rho, n, cn, ns, udnei, q=4)
invQ1 = solve(Q[[1]])
invQ2 = solve(Q[[2]])
invQ3 = solve(Q[[3]])
invQ4 = solve(Q[[4]])
Sigma = A %*% t(A)
pro_kprod = kronecker(pro_A, diag(n))
invQ = as.matrix(bdiag(bdiag(invQ1, invQ2), bdiag(invQ3, invQ4)))
pro_Vr = pro_kprod %*% as.matrix(forceSymmetric(invQ)) %*% t(pro_kprod)
pro_Sigma = pro_A %*% t(pro_A)
lpA = -(nu+4)/2*logdet(Sigma) - 1/2*sum(diag(nu*R%*%solve(Sigma))) + log(Jacob_A(A)) + sum(log(diag(A)))
pro_lpA = -(nu+4)/2*logdet(pro_Sigma) - 1/2*sum(diag(nu*R%*%solve(pro_Sigma))) + log(Jacob_A(pro_A)) + sum(log(diag(pro_A)))
MH = dmvnorm(r,mean=rep(0, nq),sigma=as.matrix(forceSymmetric(pro_Vr)),log=T) + pro_lpA -
dmvnorm(r,mean=rep(0, nq),sigma=Vr,log=T) - lpA
if(runif(1,0,1)<exp(MH)){
A = pro_A
Vr = as.matrix(forceSymmetric(pro_Vr))
acceptA=acceptA+1
}
######################
### record samples ###
######################
keeptheta[iter,] = theta
keepphi[iter,] = theta[u]
keepA[iter,] = as.vector(A)[-c(5,9,10,13:15)]
keepbeta[iter,]=beta
keeptaus[iter]=taus
keeprho1[iter]=rho[1]
keeprho2[iter]=rho[2]
keeprho3[iter]=rho[3]
keeprho4[iter]=rho[4]
keepu[iter,]=u
keepv[iter,]=v
keepr[iter,]=r
keepFr[iter,]=F_r
# cat("iteration = ", i, "acceptance rate of r = ", acceptr/(n*i),"acceptance rate of v = ",acceptv/(n.atoms*i), "\n")
}
list(beta=keepbeta[burn:runs,], A = keepA[burn:runs,],phi=keepphi[burn:runs,],theta=keeptheta[burn:runs,],u=keepu[burn:runs,],
v=keepv[burn:runs,],r=keepr[burn:runs,],Fr=keepFr[burn:runs,], taus=keeptaus[burn:runs],
rho1=keeprho1[burn:runs], rho2=keeprho2[burn:runs], rho3=keeprho3[burn:runs], rho4=keeprho4[burn:runs])
}
set.seed(123)
mcmc_samples=ARDP_joint_diff(y=Y, x1=as.matrix(X1[,1]), x2=as.matrix(X2[,1]), x3 = as.matrix(X3[,1]), x4 = as.matrix(X4[,1]),
X = X, E=E, Minc, alpha=1, q = 4, n.atoms=15, runs=30000, burn=20000)
## [1] 30000
## [1] 0.2543418
## [1] 0.1761684
## [1] 0.2381413
## [1] 0.1907908
## [1] 0.2749669
## [1] 0.209007
## [1] 0.2755759
## [1] 0.351045
## [1] 0.2270742
#Estimates of parameters
sample.mcmc = cbind(mcmc_samples$taus, mcmc_samples$rho1, mcmc_samples$rho2, mcmc_samples$rho3, mcmc_samples$rho4)
phis <- mcmc_samples$phi
phis1 <- phis[,1:58][,order(final_perm)]
phis2 <- phis[,59:116][,order(final_perm)]
phis3 <- phis[,117:174][,order(final_perm)]
phis4 <- phis[,175:232][,order(final_perm)]
phis_origin <- cbind(phis1, phis2, phis3, phis4)
We calculate posterior probabilities: \(P(\phi_{id} \neq \phi_{jd}), \quad i \sim j\)
vij_samples1 <- t(apply(phis1, 1, function(x){
diff_b1 <- NULL
for(i in 1:nrow(neighbor_list0)){
if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
diff_b1 <- c(diff_b1, 1)
}else{
diff_b1 <- c(diff_b1, 0)
}
}
return(diff_b1)
}))
vij_samples2 <- t(apply(phis2, 1, function(x){
diff_b1 <- NULL
for(i in 1:nrow(neighbor_list0)){
if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
diff_b1 <- c(diff_b1, 1)
}else{
diff_b1 <- c(diff_b1, 0)
}
}
return(diff_b1)
}))
vij_samples3 <- t(apply(phis3, 1, function(x){
diff_b1 <- NULL
for(i in 1:nrow(neighbor_list0)){
if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
diff_b1 <- c(diff_b1, 1)
}else{
diff_b1 <- c(diff_b1, 0)
}
}
return(diff_b1)
}))
vij_samples4 <- t(apply(phis4, 1, function(x){
diff_b1 <- NULL
for(i in 1:nrow(neighbor_list0)){
if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
diff_b1 <- c(diff_b1, 1)
}else{
diff_b1 <- c(diff_b1, 0)
}
}
return(diff_b1)
}))
# probabilities: P(phi_id != phi_jd), i~j
pvij1 <- apply(vij_samples1, 2, mean)
pvij2 <- apply(vij_samples2, 2, mean)
pvij3 <- apply(vij_samples3, 2, mean)
pvij4 <- apply(vij_samples4, 2, mean)
names(pvij1) <- neighbor_name
names(pvij2) <- neighbor_name
names(pvij3) <- neighbor_name
names(pvij4) <- neighbor_name
a. Top \(75\) pairs of neighboring counties with significant boundary effect
name_diff1 <- names(pvij1[order(pvij1,decreasing = T)][1:75])
name_diff2 <- names(pvij2[order(pvij2,decreasing = T)][1:75])
name_diff3 <- names(pvij3[order(pvij3,decreasing = T)][1:75])
name_diff4 <- names(pvij4[order(pvij4,decreasing = T)][1:75])
# Table for top 75 pairs of neighbors
name_diff = cbind(name_diff1, name_diff2, name_diff3, name_diff4)
colnames(name_diff) = c("Lung", "Esophageal", "Layrnx", "Coloretal")
knitr::kable(name_diff, caption = "Names of adjacent counties that have significant boundary effect (top 75) from the MDAGAR model for each cancer individually.")
| Lung | Esophageal | Layrnx | Coloretal |
|---|---|---|---|
| alameda, contra costa | butte, sutter | merced, tuolumne | alameda, contra costa |
| alameda, san joaquin | calaveras, san joaquin | mariposa, merced | amador, san joaquin |
| alameda, santa clara | calaveras, stanislaus | napa, yolo | butte, sutter |
| amador, el dorado | el dorado, sacramento | lake, yolo | calaveras, san joaquin |
| amador, sacramento | fresno, inyo | san joaquin, santa clara | contra costa, san joaquin |
| amador, san joaquin | inyo, kern | san mateo, santa clara | fresno, inyo |
| butte, colusa | inyo, san bernardino | inyo, kern | inyo, kern |
| butte, sutter | inyo, tulare | madera, tuolumne | inyo, tulare |
| calaveras, san joaquin | kern, san luis obispo | sacramento, yolo | kern, los angeles |
| calaveras, stanislaus | kings, san luis obispo | fresno, inyo | kern, san bernardino |
| colusa, glenn | lake, mendocino | calaveras, san joaquin | kern, san luis obispo |
| colusa, lake | lake, napa | inyo, san bernardino | kern, santa barbara |
| contra costa, sacramento | lake, yolo | calaveras, stanislaus | kern, ventura |
| contra costa, solano | los angeles, ventura | inyo, tulare | kings, san luis obispo |
| el dorado, sacramento | madera, mariposa | santa clara, stanislaus | lake, yolo |
| fresno, inyo | madera, tuolumne | orange, san diego | madera, mariposa |
| fresno, madera | mariposa, merced | madera, mariposa | mariposa, merced |
| fresno, tulare | merced, tuolumne | orange, riverside | merced, tuolumne |
| glenn, lake | mono, tuolumne | contra costa, sacramento | monterey, san luis obispo |
| imperial, riverside | monterey, san luis obispo | napa, solano | napa, yolo |
| imperial, san diego | napa, yolo | lassen, plumas | san luis obispo, santa barbara |
| inyo, kern | orange, riverside | kern, san luis obispo | solano, yolo |
| inyo, mono | orange, san diego | lassen, shasta | sacramento, yolo |
| inyo, san bernardino | san joaquin, santa clara | merced, stanislaus | inyo, san bernardino |
| inyo, tulare | san mateo, santa clara | el dorado, sacramento | lassen, shasta |
| kern, san luis obispo | stanislaus, tuolumne | colusa, glenn | santa clara, stanislaus |
| kern, santa barbara | napa, solano | butte, colusa | monterey, santa cruz |
| kern, ventura | merced, stanislaus | santa clara, santa cruz | san mateo, santa clara |
| kings, san luis obispo | kern, santa barbara | lassen, modoc | calaveras, stanislaus |
| lake, mendocino | lassen, shasta | kings, san luis obispo | napa, solano |
| lake, napa | santa clara, stanislaus | stanislaus, tuolumne | butte, yuba |
| lake, sonoma | kern, ventura | mono, tuolumne | colusa, lake |
| lake, yolo | lassen, plumas | mariposa, stanislaus | orange, riverside |
| lassen, plumas | butte, colusa | sutter, yolo | colusa, glenn |
| lassen, shasta | sutter, yolo | lake, sonoma | madera, tuolumne |
| los angeles, orange | colusa, glenn | humboldt, siskiyou | sacramento, san joaquin |
| los angeles, ventura | solano, yolo | colusa, lake | san joaquin, santa clara |
| madera, mariposa | placer, sacramento | inyo, mono | alameda, santa clara |
| madera, tuolumne | lake, sonoma | amador, san joaquin | inyo, mono |
| mariposa, merced | mariposa, stanislaus | butte, sutter | mariposa, stanislaus |
| mariposa, stanislaus | los angeles, orange | plumas, sierra | san francisco, san mateo |
| merced, stanislaus | sacramento, yolo | riverside, san bernardino | el dorado, sacramento |
| merced, tuolumne | colusa, lake | solano, yolo | merced, stanislaus |
| mono, tuolumne | inyo, mono | sacramento, san joaquin | amador, sacramento |
| monterey, san luis obispo | lassen, modoc | alpine, mono | humboldt, mendocino |
| napa, solano | mendocino, trinity | madera, merced | butte, colusa |
| napa, yolo | amador, san joaquin | fresno, madera | placer, sutter |
| orange, riverside | alameda, santa clara | nevada, sierra | colusa, yolo |
| orange, san bernardino | madera, merced | san benito, santa cruz | lassen, plumas |
| orange, san diego | santa clara, santa cruz | fresno, mono | lassen, modoc |
| riverside, san bernardino | alpine, tuolumne | lake, napa | plumas, yuba |
| sacramento, san joaquin | mendocino, tehama | alameda, stanislaus | monterey, san benito |
| sacramento, yolo | san luis obispo, santa barbara | alameda, san joaquin | riverside, san diego |
| san francisco, san mateo | nevada, sierra | alpine, amador | los angeles, orange |
| san joaquin, santa clara | orange, san bernardino | modoc, siskiyou | orange, san bernardino |
| san luis obispo, santa barbara | glenn, lake | placer, sutter | orange, san diego |
| san mateo, santa clara | fresno, madera | alpine, tuolumne | sutter, yolo |
| santa clara, stanislaus | fresno, mono | kings, monterey | humboldt, trinity |
| solano, yolo | alpine, amador | san luis obispo, santa barbara | alpine, mono |
| stanislaus, tuolumne | humboldt, mendocino | lake, mendocino | mono, tuolumne |
| sutter, yolo | placer, sutter | placer, sacramento | fresno, mono |
| amador, calaveras | kern, monterey | alpine, calaveras | plumas, sierra |
| alpine, amador | sierra, yuba | mendocino, trinity | lake, sonoma |
| humboldt, trinity | alameda, contra costa | sierra, yuba | nevada, yuba |
| mendocino, trinity | kings, monterey | los angeles, ventura | madera, merced |
| butte, yuba | plumas, sierra | orange, san bernardino | alpine, el dorado |
| madera, merced | sutter, yuba | mendocino, tehama | mariposa, tuolumne |
| fresno, kings | alameda, san joaquin | sutter, yuba | humboldt, siskiyou |
| lassen, modoc | los angeles, san bernardino | alpine, el dorado | stanislaus, tuolumne |
| mendocino, sonoma | nevada, placer | colusa, yolo | del norte, humboldt |
| placer, sacramento | del norte, siskiyou | humboldt, trinity | amador, el dorado |
| alameda, stanislaus | alpine, calaveras | kern, monterey | alpine, amador |
| san mateo, santa cruz | del norte, humboldt | del norte, siskiyou | alpine, tuolumne |
| fresno, monterey | colusa, yolo | monterey, san luis obispo | nevada, sierra |
| mendocino, tehama | amador, el dorado | monterey, san benito | placer, sacramento |
Number \(1-61\) for lung cancer, \(1-26\) for esophageal cancer and \(1-22\) for colorectal cancer are ranked by initial letters with \(P(\phi_{id} \neq \phi_{jd} | \bm{y}) = 1\).
b. Estimated FDR curves
T_edge1 = seq(0, 135, 1)[-1]
threshold1 = sort(pvij1, decreasing = TRUE)[T_edge1]
threshold2 = sort(pvij2, decreasing = TRUE)[T_edge1]
threshold3 = sort(pvij3, decreasing = TRUE)[T_edge1]
threshold4 = sort(pvij4, decreasing = TRUE)[T_edge1]
FDR_est1 <- rep(0, length(T_edge1))
FDR_est2 <- rep(0, length(T_edge1))
FDR_est3 <- rep(0, length(T_edge1))
FDR_est4 <- rep(0, length(T_edge1))
for(i in 1:length(threshold1)){
th1 <- threshold1[i]
est_diff1 <- as.numeric(pvij1 >= th1)
FDR_est1[i] <- sum((1-pvij1) * est_diff1) / sum(est_diff1)
th2 <- threshold2[i]
est_diff2 <- as.numeric(pvij2 >= th2)
FDR_est2[i] <- sum((1-pvij2) * est_diff2) / sum(est_diff2)
th3 <- threshold3[i]
est_diff3 <- as.numeric(pvij3 >= th3)
FDR_est3[i] <- sum((1-pvij3) * est_diff3) / sum(est_diff3)
th4 <- threshold4[i]
est_diff4 <- as.numeric(pvij4 >= th4)
FDR_est4[i] <- sum((1-pvij4) * est_diff4) / sum(est_diff4)
}
plot(FDR_est1,type="l", lty = 1, xlab="Number of edges selected", ylab = "Estimated FDR", ylim=c(0,0.5))
lines(FDR_est2,type="l", lty = 2)
lines(FDR_est3,type="l", lty = 3)
lines(FDR_est4,type="l", lty = 4)
legend("topleft", legend=c("Lung", "Esophageal", "Layrnx", "Colorectal"),
lty=1:4, cex=0.7)
c. Difference boundary detection
By setting threshold for FDR as \(0.025\), we get difference boundaries detected for each cancer individually.
# Libraries for map with boundaires
if (!require(gpclib)) install.packages("gpclib", type="source")
gpclibPermit()
## [1] TRUE
library(plyr)
ca.poly@data$id <- rownames(ca.poly@data)
ca.poly.f <- fortify(ca.poly, region = "id")
ca.poly.df <- join(ca.poly.f, ca.poly@data, by = "id")
# Correct boundary data for the map
path=list()
for(i in 1: nrow(neighbor_list0)){
r1 <- ca.poly.df[ca.poly.df$id %in% neighbor_list0[i,1],1:2]
r2 <- ca.poly.df[ca.poly.df$id %in% neighbor_list0[i,2],1:2]
edges <- generics::intersect(r1, r2)
path[[i]] = edges
}
path[[2]][nrow(path[[2]])+1,] <- path[[2]][1,]
path[[2]] <- path[[2]][-1,]
path[[11]][nrow(path[[11]])+1,] <- path[[11]][1,]
path[[11]] <- path[[11]][-1,]
path[[19]][nrow(path[[19]])+1,] <- path[[19]][1,]
path[[19]] <- path[[19]][-1,]
path[[25]][nrow(path[[25]])+1,] <- path[[25]][1,]
path[[25]] <- path[[25]][-1,]
path[[27]][nrow(path[[27]])+1,] <- path[[27]][1,]
path[[27]] <- path[[27]][-1,]
path[[31]][nrow(path[[31]])+1,] <- path[[31]][1,]
path[[31]] <- path[[31]][-1,]
path[[39]][nrow(path[[39]])+1,] <- path[[39]][1,]
path[[39]] <- path[[39]][-1,]
path[[43]][nrow(path[[43]])+1,] <- path[[43]][1,]
path[[43]] <- path[[43]][-1,]
path[[46]][nrow(path[[46]])+1,] <- path[[46]][1,]
path[[46]] <- path[[46]][-1,]
path[[64]][nrow(path[[64]])+1,] <- path[[64]][1,]
path[[64]] <- path[[64]][-1,]
path[[75]][nrow(path[[75]])+1,] <- path[[75]][1,]
path[[75]] <- path[[75]][-1,]
path[[76]][nrow(path[[76]])+1,] <- path[[76]][1,]
path[[76]] <- path[[76]][-1,]
path[[81]][nrow(path[[81]])+1,] <- path[[81]][1,]
path[[81]] <- path[[81]][-1,]
path[[100]][nrow(path[[100]])+1,] <- path[[100]][1,]
path[[100]] <- path[[100]][-1,]
path[[125]][nrow(path[[125]])+1,] <- path[[125]][1,]
path[[125]] <- path[[125]][-1,]
path[[130]][nrow(path[[130]])+1,] <- path[[130]][1,]
path[[130]] <- path[[130]][-1,]
path[[133]][nrow(path[[133]])+1,] <- path[[133]][1,]
path[[133]] <- path[[133]][-1,]
# Set threshold for FDR: 0.025
alpha_n = 0.025
T1 = sum(FDR_est1<=alpha_n)
T2 = sum(FDR_est2<=alpha_n)
T3 = sum(FDR_est3<=alpha_n)
T4 = sum(FDR_est4<=alpha_n)
# Difference boundary for each cancer with FDR <= 0.025
est_diff1 <- as.numeric(pvij1 >= threshold1[T1])
est_diff1_1 <- as.numeric(pvij1 >= threshold1[75])
est_diff2 <- as.numeric(pvij2 >= threshold2[T2])
est_diff3 <- as.numeric(pvij3 >= threshold3[T3])
est_diff4 <- as.numeric(pvij4 >= threshold4[T4])
# Lung cancer
edge_plot1 <- ggplot() +
geom_polygon(data = ca.poly.df, aes(long, lat, group = group),
color = "black",
fill = "white"
)
for(i in which(est_diff1 == 1)){
edge_plot1 = edge_plot1 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "dodgerblue") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=16, face="bold",hjust = 0.5)) +
ggtitle(paste("Lung (T = ", T1, ")", sep=""))
}
for(i in which(est_diff1_1 == 1)){
edge_plot1 = edge_plot1 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red")
}
# Esophageal cancer
edge_plot2 <- ggplot() +
geom_polygon(data = ca.poly.df, aes(long, lat, group = group),
color = "black",
fill = "white"
)
for(i in which(est_diff2 == 1)){
edge_plot2 = edge_plot2 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=16, face="bold",hjust = 0.5)) +
ggtitle(paste("Esophageal (T = ", T2, ")", sep=""))
}
# Larynx cancer
edge_plot3 <- ggplot() +
geom_polygon(data = ca.poly.df, aes(long, lat, group = group),
color = "black",
fill = "white"
)
for(i in which(est_diff3 == 1)){
edge_plot3 = edge_plot3 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=16, face="bold",hjust = 0.5)) +
ggtitle(paste("Larynx (T = ", T3, ")", sep=""))
}
# Colorectal cancer
edge_plot4 <- ggplot() +
geom_polygon(data = ca.poly.df, aes(long, lat, group = group),
color = "black",
fill = "white"
)
for(i in which(est_diff4 == 1)){
edge_plot4 = edge_plot4 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=16, face="bold",hjust = 0.5)) +
ggtitle(paste("Colorectal (T = ", T4, ")", sep=""))
}
text <- "Figure 2. Difference boundaries (highlighted in red and blue) detected by MDAGAR in SIR map for four cancers individually.\n The blue boundaries are those not included in top 75 edges for lung cancer. The values in brackets are the number of difference boundaries detected"
tgrob <- text_grob(text,size = 20)
plot_0 <- as_ggplot(tgrob) + theme(plot.margin = margin(0,3,0,27, "cm"))
ggarrange(plot_0, NULL, edge_plot1, edge_plot2, edge_plot3, edge_plot4, nrow = 3, ncol = 2, heights = c(1,5,5))
We calculate posterior probabilities: \(P(\phi_{id} \neq \phi_{jd'}, \phi_{id'} \neq \phi_{jd}),\quad i \sim j, \quad i < j,\quad d \neq d'\)
vij_samples12 <- t(apply(cbind(phis1,phis2), 1, function(x){
diff_b1 <- NULL
for(i in 1:nrow(neighbor_list0)){
if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]+58] &
x[neighbor_list0[i,1]+58] != x[neighbor_list0[i,2]]){
diff_b1 <- c(diff_b1, 1)
}else{
diff_b1 <- c(diff_b1, 0)
}
}
return(diff_b1)
}))
vij_samples13 <- t(apply(cbind(phis1,phis3), 1, function(x){
diff_b1 <- NULL
for(i in 1:nrow(neighbor_list0)){
if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]+58] &
x[neighbor_list0[i,1]+58] != x[neighbor_list0[i,2]]){
diff_b1 <- c(diff_b1, 1)
}else{
diff_b1 <- c(diff_b1, 0)
}
}
return(diff_b1)
}))
vij_samples14 <- t(apply(cbind(phis1,phis4), 1, function(x){
diff_b1 <- NULL
for(i in 1:nrow(neighbor_list0)){
if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]+58] &
x[neighbor_list0[i,1]+58] != x[neighbor_list0[i,2]]){
diff_b1 <- c(diff_b1, 1)
}else{
diff_b1 <- c(diff_b1, 0)
}
}
return(diff_b1)
}))
vij_samples23 <- t(apply(cbind(phis2,phis3), 1, function(x){
diff_b1 <- NULL
for(i in 1:nrow(neighbor_list0)){
if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]+58] &
x[neighbor_list0[i,1]+58] != x[neighbor_list0[i,2]]){
diff_b1 <- c(diff_b1, 1)
}else{
diff_b1 <- c(diff_b1, 0)
}
}
return(diff_b1)
}))
vij_samples24 <- t(apply(cbind(phis2,phis4), 1, function(x){
diff_b1 <- NULL
for(i in 1:nrow(neighbor_list0)){
if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]+58] &
x[neighbor_list0[i,1]+58] != x[neighbor_list0[i,2]]){
diff_b1 <- c(diff_b1, 1)
}else{
diff_b1 <- c(diff_b1, 0)
}
}
return(diff_b1)
}))
vij_samples34 <- t(apply(cbind(phis3,phis4), 1, function(x){
diff_b1 <- NULL
for(i in 1:nrow(neighbor_list0)){
if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]+58] &
x[neighbor_list0[i,1]+58] != x[neighbor_list0[i,2]]){
diff_b1 <- c(diff_b1, 1)
}else{
diff_b1 <- c(diff_b1, 0)
}
}
return(diff_b1)
}))
# probabilities: P(phi_id != phi_jd', phi_id' != phi_jd), i ~ j, i < j, d != d'
pvij12 <- apply(vij_samples12, 2, mean)
pvij13 <- apply(vij_samples13, 2, mean)
pvij14 <- apply(vij_samples14, 2, mean)
pvij23 <- apply(vij_samples23, 2, mean)
pvij24 <- apply(vij_samples24, 2, mean)
pvij34 <- apply(vij_samples34, 2, mean)
# Estimated FDR
threshold12 = sort(pvij12, decreasing = TRUE)[T_edge1]
threshold13 = sort(pvij13, decreasing = TRUE)[T_edge1]
threshold14 = sort(pvij14, decreasing = TRUE)[T_edge1]
threshold23 = sort(pvij23, decreasing = TRUE)[T_edge1]
threshold24 = sort(pvij24, decreasing = TRUE)[T_edge1]
threshold34 = sort(pvij34, decreasing = TRUE)[T_edge1]
FDR_est12 <- rep(0, length(T_edge1))
FDR_est13 <- rep(0, length(T_edge1))
FDR_est14 <- rep(0, length(T_edge1))
FDR_est23 <- rep(0, length(T_edge1))
FDR_est24 <- rep(0, length(T_edge1))
FDR_est34 <- rep(0, length(T_edge1))
for(i in 1:length(threshold1)){
th12 <- threshold12[i]
est_diff12<- as.numeric(pvij12 >= th12)
FDR_est12[i] <- sum((1-pvij12) * est_diff12) / sum(est_diff12)
th13 <- threshold13[i]
est_diff13 <- as.numeric(pvij13 >= th13)
FDR_est13[i] <- sum((1-pvij13) * est_diff13) / sum(est_diff13)
th14 <- threshold14[i]
est_diff14 <- as.numeric(pvij14 >= th14)
FDR_est14[i] <- sum((1-pvij14) * est_diff14) / sum(est_diff14)
th23 <- threshold23[i]
est_diff23 <- as.numeric(pvij23 >= th23)
FDR_est23[i] <- sum((1-pvij23) * est_diff23) / sum(est_diff23)
th24 <- threshold24[i]
est_diff24 <- as.numeric(pvij24 >= th24)
FDR_est24[i] <- sum((1-pvij24) * est_diff24) / sum(est_diff24)
th34 <- threshold34[i]
est_diff34 <- as.numeric(pvij34 >= th34)
FDR_est34[i] <- sum((1-pvij34) * est_diff34) / sum(est_diff34)
}
Use the same threshold as above and identify cross-disease boundaries
alpha_n1 = 0.025
T12=sum(FDR_est12<=alpha_n1)
T13=sum(FDR_est13<=alpha_n1)
T14=sum(FDR_est14<=alpha_n1)
T23=sum(FDR_est23<=alpha_n1)
T24=sum(FDR_est24<=alpha_n1)
T34=sum(FDR_est34<=alpha_n1)
est_diff12 <- as.numeric(pvij12 >= threshold12[T12])
est_diff13 <- as.numeric(pvij13 >= threshold13[T13])
est_diff14 <- as.numeric(pvij14 >= threshold23[T14])
est_diff23 <- as.numeric(pvij23 >= threshold23[T23])
est_diff24 <- as.numeric(pvij24 >= threshold24[T24])
est_diff34 <- as.numeric(pvij34 >= threshold34[T34])
edge_plot12 <- ggplot() +
geom_polygon(data = ca.poly.df, aes(long, lat, group = group),
color = "black",
fill = "white"
)
for(i in which(est_diff12==1)){
edge_plot12 = edge_plot12 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=14, face="bold",hjust = 0.5)) +
ggtitle(paste("Lung vs. Esophageal (T = ", T12, ")", sep=""))
}
edge_plot13 <- ggplot() +
geom_polygon(data = ca.poly.df, aes(long, lat, group = group),
color = "black",
fill = "white"
)
for(i in which(est_diff13==1)){
edge_plot13 = edge_plot13 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=14, face="bold",hjust = 0.5)) +
ggtitle(paste("Lung vs. Layrnx (T = ", T13, ")", sep=""))
}
edge_plot14 <- ggplot() +
geom_polygon(data = ca.poly.df, aes(long, lat, group = group),
color = "black",
fill = "white"
)
for(i in which(est_diff14==1)){
edge_plot14 = edge_plot14 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=14, face="bold",hjust = 0.5)) +
ggtitle(paste("Lung vs. Colorectal (T = ", T14, ")", sep=""))
}
edge_plot23 <- ggplot() +
geom_polygon(data = ca.poly.df, aes(long, lat, group = group),
color = "black",
fill = "white"
)
for(i in which(est_diff23==1)){
edge_plot23 = edge_plot23 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=14, face="bold",hjust = 0.5)) +
ggtitle(paste("Esophageal vs. Layrnx (T = ", T23, ")", sep=""))
}
edge_plot24 <- ggplot() +
geom_polygon(data = ca.poly.df, aes(long, lat, group = group),
color = "black",
fill = "white"
)
for(i in which(est_diff24==1)){
edge_plot24 = edge_plot24 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=14, face="bold",hjust = 0.5)) +
ggtitle(paste("Esophageal vs. Colorectal (T = ", T24, ")", sep=""))
}
edge_plot34 <- ggplot() +
geom_polygon(data = ca.poly.df, aes(long, lat, group = group),
color = "black",
fill = "white"
)
for(i in which(est_diff34==1)){
edge_plot34 = edge_plot34 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=14, face="bold",hjust = 0.5)) +
ggtitle(paste("Larynx vs. Colorectal (T = ", T34, ")", sep=""))
}
text <- "Figure 4. Mutual cross-cancer difference boundaries (highlighted in red) detected by MDAGAR for each pair of cancer in SIR map. \n The values in brackets are the number of difference boundaries detected"
tgrob <- text_grob(text,size = 23)
plot_0 <- as_ggplot(tgrob) + theme(plot.margin = margin(0,0,0,33, "cm"))
ggarrange(plot_0, NULL, NULL, edge_plot12, edge_plot13, edge_plot14,
edge_plot23, edge_plot24, edge_plot34, nrow = 3, ncol = 3, heights = c(1, 5, 5))
Y_rep1 <- matrix(0, nrow = n, ncol = 10000)
Y_rep2 <- matrix(0, nrow = n, ncol = 10000)
Y_rep3 <- matrix(0, nrow = n, ncol = 10000)
Y_rep4 <- matrix(0, nrow = n, ncol = 10000)
for(i in 1:10000){
beta1 <- mcmc_samples$beta[i,1]
beta2 <- mcmc_samples$beta[i,2]
beta3 <- mcmc_samples$beta[i,3]
beta4 <- mcmc_samples$beta[i,4]
W1_mcmc = mcmc_samples$phi[i,1:58]
W2_mcmc = mcmc_samples$phi[i,59:116]
W3_mcmc = mcmc_samples$phi[i,117:174]
W4_mcmc = mcmc_samples$phi[i,175:232]
Y_rep1[,i] <- rpois(n, E1 * exp(X1[,1] * beta1 + W1_mcmc)) / E1
Y_rep2[,i] <- rpois(n, E2 * exp(X2[,1] * beta2 + W2_mcmc)) / E2
Y_rep3[,i] <- rpois(n, E3 * exp(X3[,1] * beta3 + W3_mcmc)) / E3
Y_rep4[,i] <- rpois(n, E4 * exp(X4[,1] * beta4 + W4_mcmc)) / E4
}
mu_rep1 = rowMeans(Y_rep1)
mu_rep2 = rowMeans(Y_rep2)
mu_rep3 = rowMeans(Y_rep3)
mu_rep4 = rowMeans(Y_rep4)
var_rep1 = rowVars(Y_rep1)
var_rep2 = rowVars(Y_rep2)
var_rep3 = rowVars(Y_rep3)
var_rep4 = rowVars(Y_rep4)
G.latent1 = sum((Y1/E1 - mu_rep1)^2)
P.latent1 = sum(var_rep1)
D.latent1 = G.latent1 + P.latent1
G.latent2 = sum((Y2/E2 - mu_rep2)^2)
P.latent2 = sum(var_rep2)
D.latent2 = G.latent2 + P.latent2
G.latent3 = sum((Y3/E3 - mu_rep3)^2)
P.latent3 = sum(var_rep3)
D.latent3 = G.latent3 + P.latent3
G.latent4 = sum((Y4/E4 - mu_rep4)^2)
P.latent4 = sum(var_rep4)
D.latent4 = G.latent4 + P.latent4
D_DAGAR = c(D.latent1, D.latent2, D.latent3, D.latent4)
D_DAGAR
## [1] 2.503756 26.957530 45.340546 2.328468
sum(D_DAGAR)
## [1] 77.1303
# Matrix for precision matrix of CAR
D=matrix(0,n,n)
for(i in 1:n) D[i,i]=ni[i]
#######################################
### MCAR model with no covariates ###
#######################################
# Metropolis within Gibbs Sampler for MCMC updating
ARDP_joint_diff_car<-function(y=Y, x1=as.matrix(X1[,1]), x2=as.matrix(X2[,1]), x3 = as.matrix(X3[,1]), x4 = as.matrix(X4[,1]),
X = X, E = E, Minc, alpha=1, q = 4, n.atoms=15, runs=10000, burn=1000){
#y: data
#x: covariates
#n.atoms: number of atoms in the mixture dist.
#theta: the theta's (iid from baseline) in the model
#alpha: v~beta(1,alpha)
#u: the index indicator of spatial random effect
#rho: sptatial autocorrelation parameter in DAGAR
#Minc: 0-1 adjacency matrix
#V_r: covariance matrix of joint MDAGAR
#Q: presicion matrix of DAGAR
#r: random effects following DAGAR
#F_r: Marginal CDF of r
#taued: presicion in Baseline of DP for disease d
#taus: precision for theta
nq<-length(y)
n <- nq / q
p<-ncol(X)
y1 <- y[1:n]
y2 <- y[(n+1):(2*n)]
y3 <- y[(2*n+1):(3*n)]
y4 <- y[(3*n+1):(4*n)]
E1 <- E[1:n]
E2 <- E[(n+1):(2*n)]
E3 <- E[(2*n+1):(3*n)]
E4 <- E[(3*n+1):(4*n)]
sigmasq_beta = 10000
keepbeta=matrix(0,runs,p)
keepphi=matrix(0,runs,nq)
keeptheta=matrix(0,runs,n.atoms)
keepA=matrix(0,runs,10)
keepu=matrix(0,runs,nq)
keeprho1=keeprho2=keeprho3=keeprho4=keeptaus = rep(0,runs)
keepv=matrix(0,runs,n.atoms)
keepr=matrix(0,runs,nq)
keepFr=matrix(0,runs,nq)
#initial values
theta=rep(0,n.atoms)
beta1<-rep(0,ncol(x1))
beta2<-rep(0,ncol(x2))
beta3<-rep(0,ncol(x3))
beta4<-rep(0,ncol(x4))
beta = c(beta1, beta2, beta3, beta4)
taus = 1
c=2
d=0.1
d2=1
v<-rep(.1,n.atoms)
probs=makeprobs(v)
#A matrix
rho=c(0.5, 0.5, 0.5, 0.5)
eta = log(rho/(1-rho))
A = matrix(0, q, q)
for(i in 1:q){
for(j in 1:i){
if(j == i){
A[i, j] = exp(rnorm(1))
}else{
A[i, j] = rnorm(1)
}
}
}
invQ1 = solve(D - rho[1] * Minc)
invQ2 = solve(D - rho[2] * Minc)
invQ3 = solve(D - rho[3] * Minc)
invQ4 = solve(D - rho[4] * Minc)
kprod = kronecker(A, diag(n))
invQ = as.matrix(bdiag(bdiag(invQ1, invQ2), bdiag(invQ3, invQ4)))
Vr = as.matrix(forceSymmetric(kprod %*% invQ %*% t(kprod)))
r = rmvnorm(1, rep(0, nq), Vr)
F_r=pnorm(r,0,sqrt(diag(Vr)))
u=makeu(F_r,probs)
phi <- theta[u]
nu = 2
R = 0.1 * diag(q)
acceptr=acceptv=acceptrho=acceptA=0
acceptbeta1 = acceptbeta2=acceptbeta3=acceptbeta4 = 0
accepttheta = 0
count<-afterburn<-0
burn = burn + 1
for(iter in 1:runs){
if(iter == runs){
print(iter)
print(acceptA/(iter-1))
print(acceptr/nq/(iter-1))
print(acceptrho/(iter-1))
print(acceptv/n.atoms/(iter-1))
print(accepttheta/n.atoms/(iter-1))
print(acceptbeta1/(iter-1))
print(acceptbeta2/(iter-1))
print(acceptbeta3/(iter-1))
print(acceptbeta4/(iter-1))
}
######################
### update beta ###
######################
# update beta (intercept only model)
pro_beta1=rnorm(1,beta1,0.02)
MHrate1=sum(-E1*exp(pro_beta1*x1+theta[u][1:n])+y1*(pro_beta1*x1+theta[u][1:n]))-
sum(-E1*exp(beta1*x1+theta[u][1:n])+y1*(beta1*x1+theta[u][1:n]))
if(runif(1,0,1)<exp(MHrate1)){
beta1<-pro_beta1
acceptbeta1=acceptbeta1+1
}
pro_beta2=rnorm(1,beta2,0.05)
MHrate2=sum(-E2*exp(pro_beta2*x2+theta[u][(n+1):(2*n)])+y2*(pro_beta2*x2+theta[u][(n+1):(2*n)]))-
sum(-E2*exp(beta2*x2+theta[u][(n+1):(2*n)])+y2*(beta2*x2+theta[u][(n+1):(2*n)]))
if(runif(1,0,1)<exp(MHrate2)){
beta2<-pro_beta2
acceptbeta2=acceptbeta2+1
}
pro_beta3=rnorm(1,beta3,0.05)
MHrate3=sum(-E3*exp(pro_beta3*x3+theta[u][(2*n+1):(3*n)])+y3*(pro_beta3*x3+theta[u][(2*n+1):(3*n)]))-
sum(-E3*exp(beta3*x3+theta[u][(2*n+1):(3*n)])+y3*(beta3*x3+theta[u][(2*n+1):(3*n)]))
if(runif(1,0,1)<exp(MHrate3)){
beta3<-pro_beta3
acceptbeta3=acceptbeta3+1
}
pro_beta4=rnorm(1,beta4,0.02)
MHrate4=sum(-E4*exp(pro_beta4*x4+theta[u][(3*n+1):(4*n)])+y4*(pro_beta4*x4+theta[u][(3*n+1):(4*n)]))-
sum(-E4*exp(beta4*x4+theta[u][(3*n+1):(4*n)])+y4*(beta4*x4+theta[u][(3*n+1):(4*n)]))
if(runif(1,0,1)<exp(MHrate4)){
beta4<-pro_beta4
acceptbeta4=acceptbeta4+1
}
beta <- c(beta1, beta2, beta3, beta4)
#########################
### update theta ###
#########################
u1 = u[1:n]
u2 = u[(n+1):(2*n)]
u3 = u[(2*n+1):(3*n)]
u4 = u[(3*n+1):(4*n)]
for (j in 1:n.atoms){
count[j]=sum(u==j)
if (count[j]==0){
theta[j]=rnorm(1,0,sqrt(1/taus))
}else{
pro_theta=rnorm(1,theta[j],0.06)
MHrate<-sum(-(E[u==j])*exp(X[u==j,] %*% beta+pro_theta)+(y[u==j])*(X[u==j,] %*% beta+pro_theta))-taus/2*pro_theta^2-
sum(-(E[u==j])*exp(X[u==j,] %*% beta+theta[j])+(y[u==j])*(X[u==j,] %*% beta+theta[j]))+taus/2*theta[j]^2
if(runif(1,0,1)<exp(MHrate)){
theta[j]<-pro_theta
accepttheta=accepttheta+1
}
}
}
######################
### update r ###
######################
for (k in 1:nq){
pro_r=r;pro_Fr=F_r;pro_u=u
pro_r[k]=rnorm(1,r[k],1.7)
pro_Fr[k]=pnorm(pro_r[k],0,sqrt(Vr[k,k]))
pro_u[k]=makeu(pro_Fr[k],probs)
MH=dmvnorm(pro_r,mean=rep(0, nq),sigma=Vr,log=T)+dpois(y[k],E[k]*exp(as.numeric(X[k,]%*%beta)+theta[pro_u[k]]),log=T)-
dmvnorm(r,mean=rep(0, nq),sigma=Vr,log=T)-dpois(y[k],E[k]*exp(as.numeric(X[k,]%*%beta)+theta[u[k]]),log=T)
if(runif(1,0,1)<exp(MH)){
r[k]=pro_r[k]
F_r[k]=pro_Fr[k]
u[k]=pro_u[k]
acceptr=acceptr+1
}
}
######################
### update v ###
######################
for (j in 1:n.atoms){
pro_v=v
pro_v[j]<-rnorm(1,v[j],0.06)
while (pro_v[j]<0 || pro_v[j]>1) {pro_v[j]<-rnorm(1,v[j],0.06)}
pro_probs=makeprobs(pro_v)
pro_u=makeu(F_r,pro_probs)
MH=log(dbeta(pro_v[j],1,alpha))+sum(dpois(y,E*exp(X%*%beta+theta[pro_u]),log=T))-
log(dbeta(v[j],1,alpha))-sum(dpois(y,E*exp(X%*%beta+theta[u]),log=T))
if(runif(1,0,1)<exp(MH)){
v[j]=pro_v[j]
probs=pro_probs
u=pro_u
acceptv=acceptv+1
}
# }
}
######################
### update taus ###
######################
taus=rgamma(1,shape=1/2*n.atoms+c,rate=sum(theta^2)/2+d2)
######################
### update rho ###
######################
pro_eta1 = rnorm(1, eta[1], 1.1)
pro_eta2 = rnorm(1, eta[2], 1.1)
pro_eta3 = rnorm(1, eta[3], 1.1)
pro_eta4 = rnorm(1, eta[4], 1.1)
pro_eta = c(pro_eta1, pro_eta2, pro_eta3, pro_eta4)
pro_rho = exp(pro_eta)/(1+exp(pro_eta))
pro_invQ1 = solve(D - pro_rho[1] * Minc)
pro_invQ2 = solve(D - pro_rho[2] * Minc)
pro_invQ3 = solve(D - pro_rho[3] * Minc)
pro_invQ4 = solve(D - pro_rho[4] * Minc)
kprod = kronecker(A, diag(n))
pro_invQ = as.matrix(bdiag(bdiag(pro_invQ1, pro_invQ2), bdiag(pro_invQ3, pro_invQ4)))
pro_Vr = kprod %*% pro_invQ %*% t(kprod)
MH = dmvnorm(r,mean=rep(0, nq),sigma=as.matrix(forceSymmetric(pro_Vr)),log=T) +
log(pro_rho[1]) + log(1-pro_rho[1]) + log(pro_rho[2]) + log(1-pro_rho[2]) +
log(pro_rho[3]) + log(1-pro_rho[3]) + log(pro_rho[4]) + log(1-pro_rho[4]) -
dmvnorm(r,mean=rep(0, nq),sigma=Vr,log=T) - log(rho[1]) - log(1-rho[1]) - log(rho[2]) - log(1-rho[2]) -
log(rho[3]) - log(1-rho[3]) - log(rho[4]) - log(1-rho[4])
if(runif(1,0,1)<exp(MH)){
eta = pro_eta
rho = pro_rho
Vr = as.matrix(forceSymmetric(pro_Vr))
acceptrho=acceptrho+1
}
######################
### update A ###
######################
pro_A = matrix(0, q, q)
for(i in 1:q){
for(j in 1:i){
if(j == i){
pro_A[i, j] = exp(rnorm(1, log(A[i, j]), 0.05))
}else{
pro_A[i, j] = rnorm(1, A[i, j], 0.05)
}
}
}
invQ1 = solve(D - rho[1] * Minc)
invQ2 = solve(D - rho[2] * Minc)
invQ3 = solve(D - rho[3] * Minc)
invQ4 = solve(D - rho[4] * Minc)
Sigma = A %*% t(A)
pro_kprod = kronecker(pro_A, diag(n))
invQ = as.matrix(bdiag(bdiag(invQ1, invQ2), bdiag(invQ3, invQ4)))
pro_Vr = pro_kprod %*% as.matrix(forceSymmetric(invQ)) %*% t(pro_kprod)
pro_Sigma = pro_A %*% t(pro_A)
lpA = -(nu+4)/2*logdet(Sigma) - 1/2*sum(diag(nu*R%*%solve(Sigma))) + log(Jacob_A(A)) + sum(log(diag(A)))
pro_lpA = -(nu+4)/2*logdet(pro_Sigma) - 1/2*sum(diag(nu*R%*%solve(pro_Sigma))) + log(Jacob_A(pro_A)) + sum(log(diag(pro_A)))
MH = dmvnorm(r,mean=rep(0, nq),sigma=as.matrix(forceSymmetric(pro_Vr)),log=T) + pro_lpA -
dmvnorm(r,mean=rep(0, nq),sigma=Vr,log=T) - lpA
if(runif(1,0,1)<exp(MH)){
A = pro_A
Vr = as.matrix(forceSymmetric(pro_Vr))
acceptA=acceptA+1
}
######################
### record samples ###
######################
keeptheta[iter,] = theta
keepphi[iter,] = theta[u]
keepA[iter,] = as.vector(A)[-c(5,9,10,13:15)]
keepbeta[iter,]=beta
keeptaus[iter]=taus
keeprho1[iter]=rho[1]
keeprho2[iter]=rho[2]
keeprho3[iter]=rho[3]
keeprho4[iter]=rho[4]
keepu[iter,]=u
keepv[iter,]=v
keepr[iter,]=r
keepFr[iter,]=F_r
# cat("iteration = ", i, "acceptance rate of r = ", acceptr/(n*i),"acceptance rate of v = ",acceptv/(n.atoms*i), "\n")
}
list(beta=keepbeta[burn:runs,], A = keepA[burn:runs,],phi=keepphi[burn:runs,],theta=keeptheta[burn:runs,],u=keepu[burn:runs,],
v=keepv[burn:runs,],r=keepr[burn:runs,],Fr=keepFr[burn:runs,], taus=keeptaus[burn:runs],
rho1=keeprho1[burn:runs], rho2=keeprho2[burn:runs], rho3=keeprho3[burn:runs], rho4=keeprho4[burn:runs])
}
set.seed(123)
mcmc_samples=ARDP_joint_diff_car(y=Y, x1=as.matrix(X1[,1]), x2=as.matrix(X2[,1]), x3 = as.matrix(X3[,1]), x4 = as.matrix(X4[,1]),
X = X, E=E, Minc, alpha=1, q = 4, n.atoms=15, runs=30000, burn=20000)
## [1] 30000
## [1] 0.2113737
## [1] 0.1699972
## [1] 0.29911
## [1] 0.1804282
## [1] 0.3057857
## [1] 0.2128738
## [1] 0.2809094
## [1] 0.3533784
## [1] 0.2302743
#Estimates of parameters
sample.mcmc = cbind(mcmc_samples$taus, mcmc_samples$rho1, mcmc_samples$rho2, mcmc_samples$rho3, mcmc_samples$rho4)
phis <- mcmc_samples$phi
phis1 <- phis[,1:58][,order(final_perm)]
phis2 <- phis[,59:116][,order(final_perm)]
phis3 <- phis[,117:174][,order(final_perm)]
phis4 <- phis[,175:232][,order(final_perm)]
phis_origin <- cbind(phis1, phis2, phis3, phis4)
We calculate posterior probabilities: \(P(\phi_{id} \neq \phi_{jd}), \quad i \sim j\)
vij_samples1 <- t(apply(phis1, 1, function(x){
diff_b1 <- NULL
for(i in 1:nrow(neighbor_list0)){
if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
diff_b1 <- c(diff_b1, 1)
}else{
diff_b1 <- c(diff_b1, 0)
}
}
return(diff_b1)
}))
vij_samples2 <- t(apply(phis2, 1, function(x){
diff_b1 <- NULL
for(i in 1:nrow(neighbor_list0)){
if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
diff_b1 <- c(diff_b1, 1)
}else{
diff_b1 <- c(diff_b1, 0)
}
}
return(diff_b1)
}))
vij_samples3 <- t(apply(phis3, 1, function(x){
diff_b1 <- NULL
for(i in 1:nrow(neighbor_list0)){
if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
diff_b1 <- c(diff_b1, 1)
}else{
diff_b1 <- c(diff_b1, 0)
}
}
return(diff_b1)
}))
vij_samples4 <- t(apply(phis4, 1, function(x){
diff_b1 <- NULL
for(i in 1:nrow(neighbor_list0)){
if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
diff_b1 <- c(diff_b1, 1)
}else{
diff_b1 <- c(diff_b1, 0)
}
}
return(diff_b1)
}))
# probabilities: P(phi_id != phi_jd), i~j
pvij1 <- apply(vij_samples1, 2, mean)
pvij2 <- apply(vij_samples2, 2, mean)
pvij3 <- apply(vij_samples3, 2, mean)
pvij4 <- apply(vij_samples4, 2, mean)
names(pvij1) <- neighbor_name
names(pvij2) <- neighbor_name
names(pvij3) <- neighbor_name
names(pvij4) <- neighbor_name
a. Top \(75\) pairs of neighboring counties with significant boundary effect
name_diff1 <- names(pvij1[order(pvij1,decreasing = T)][1:75])
name_diff2 <- names(pvij2[order(pvij2,decreasing = T)][1:75])
name_diff3 <- names(pvij3[order(pvij3,decreasing = T)][1:75])
name_diff4 <- names(pvij4[order(pvij4,decreasing = T)][1:75])
# Table for top 75 pairs of neighbors
name_diff = cbind(name_diff1, name_diff2, name_diff3, name_diff4)
colnames(name_diff) = c("Lung", "Esophageal", "Layrnx", "Coloretal")
knitr::kable(name_diff, caption = "Names of adjacent counties that have significant boundary effect (top 75) from the MDAGAR model for each cancer individually.")
| Lung | Esophageal | Layrnx | Coloretal |
|---|---|---|---|
| alameda, contra costa | alameda, contra costa | alameda, contra costa | alameda, contra costa |
| alameda, san joaquin | alameda, san joaquin | alameda, san joaquin | alameda, san joaquin |
| amador, calaveras | calaveras, san joaquin | calaveras, stanislaus | amador, san joaquin |
| amador, el dorado | calaveras, stanislaus | colusa, yolo | calaveras, stanislaus |
| amador, sacramento | colusa, yolo | kern, san luis obispo | colusa, yolo |
| amador, san joaquin | kern, san luis obispo | kings, san luis obispo | contra costa, san joaquin |
| butte, colusa | kings, san luis obispo | madera, tuolumne | fresno, inyo |
| calaveras, san joaquin | lake, yolo | mariposa, stanislaus | inyo, kern |
| calaveras, stanislaus | los angeles, orange | monterey, san luis obispo | inyo, mono |
| colusa, lake | los angeles, ventura | napa, yolo | inyo, san bernardino |
| colusa, yolo | madera, tuolumne | sacramento, yolo | inyo, tulare |
| contra costa, sacramento | merced, tuolumne | san francisco, san mateo | kern, los angeles |
| contra costa, solano | monterey, san luis obispo | san joaquin, santa clara | kern, san bernardino |
| el dorado, sacramento | napa, yolo | san joaquin, stanislaus | kern, san luis obispo |
| fresno, inyo | sacramento, yolo | san luis obispo, santa barbara | kern, ventura |
| fresno, kings | san francisco, san mateo | solano, yolo | kings, san luis obispo |
| fresno, tulare | san joaquin, santa clara | stanislaus, tuolumne | lake, yolo |
| glenn, lake | san joaquin, stanislaus | sutter, yolo | madera, mariposa |
| imperial, riverside | san luis obispo, santa barbara | madera, mariposa | madera, tuolumne |
| inyo, kern | san mateo, santa clara | mariposa, merced | mariposa, merced |
| inyo, mono | solano, yolo | merced, tuolumne | mariposa, stanislaus |
| inyo, san bernardino | stanislaus, tuolumne | san mateo, santa clara | merced, tuolumne |
| inyo, tulare | sutter, yolo | santa clara, stanislaus | monterey, san luis obispo |
| kern, san luis obispo | madera, mariposa | lake, yolo | napa, yolo |
| kern, tulare | amador, san joaquin | inyo, san bernardino | sacramento, yolo |
| kings, san luis obispo | lake, mendocino | calaveras, san joaquin | san francisco, san mateo |
| lake, mendocino | los angeles, san bernardino | inyo, kern | san joaquin, santa clara |
| lake, napa | mariposa, stanislaus | fresno, inyo | san joaquin, stanislaus |
| lake, sonoma | amador, sacramento | lassen, shasta | san luis obispo, santa barbara |
| lake, yolo | lake, sonoma | amador, san joaquin | solano, yolo |
| lassen, plumas | santa clara, stanislaus | inyo, tulare | stanislaus, tuolumne |
| lassen, shasta | mariposa, merced | lake, sonoma | kern, santa barbara |
| los angeles, orange | mendocino, trinity | colusa, lake | calaveras, san joaquin |
| los angeles, ventura | lassen, shasta | santa clara, santa cruz | santa clara, stanislaus |
| madera, mariposa | colusa, lake | lassen, plumas | amador, sacramento |
| madera, tuolumne | el dorado, sacramento | humboldt, siskiyou | butte, sutter |
| mariposa, merced | lake, napa | inyo, mono | mono, tuolumne |
| mariposa, stanislaus | inyo, san bernardino | humboldt, trinity | monterey, santa cruz |
| mendocino, sonoma | lassen, plumas | amador, sacramento | sutter, yolo |
| merced, tuolumne | mendocino, tehama | lake, napa | san mateo, santa clara |
| mono, tuolumne | inyo, tulare | alameda, santa clara | lassen, shasta |
| monterey, san luis obispo | fresno, inyo | mono, tuolumne | butte, yuba |
| napa, yolo | placer, sacramento | lake, mendocino | sacramento, san joaquin |
| orange, riverside | inyo, kern | mendocino, trinity | alameda, santa clara |
| orange, san bernardino | santa clara, santa cruz | butte, colusa | riverside, san diego |
| riverside, san bernardino | nevada, placer | butte, sutter | orange, riverside |
| sacramento, yolo | kern, ventura | riverside, san bernardino | riverside, san bernardino |
| san francisco, san mateo | del norte, siskiyou | mendocino, tehama | monterey, san benito |
| san joaquin, santa clara | inyo, mono | nevada, placer | colusa, lake |
| san joaquin, stanislaus | glenn, lake | el dorado, sacramento | el dorado, sacramento |
| san luis obispo, santa barbara | orange, riverside | del norte, siskiyou | plumas, yuba |
| san mateo, santa clara | kern, santa barbara | orange, riverside | lassen, plumas |
| santa clara, stanislaus | butte, colusa | plumas, sierra | imperial, riverside |
| solano, yolo | butte, sutter | san benito, santa clara | humboldt, trinity |
| stanislaus, tuolumne | riverside, san bernardino | plumas, yuba | mariposa, tuolumne |
| sutter, yolo | humboldt, mendocino | modoc, siskiyou | merced, stanislaus |
| sacramento, san joaquin | mono, tuolumne | glenn, lake | alpine, mono |
| kern, santa barbara | nevada, sierra | los angeles, ventura | amador, el dorado |
| humboldt, trinity | alpine, tuolumne | alpine, calaveras | nevada, yuba |
| mendocino, trinity | plumas, sierra | alpine, amador | santa clara, santa cruz |
| butte, sutter | del norte, humboldt | glenn, tehama | lake, napa |
| monterey, santa cruz | merced, stanislaus | alpine, tuolumne | alpine, amador |
| merced, stanislaus | modoc, siskiyou | imperial, riverside | colusa, glenn |
| kern, ventura | glenn, tehama | nevada, sierra | butte, colusa |
| alameda, santa clara | alpine, calaveras | alpine, el dorado | alpine, calaveras |
| imperial, san diego | lassen, modoc | el dorado, placer | humboldt, siskiyou |
| butte, yuba | nevada, yuba | merced, stanislaus | plumas, sierra |
| alameda, stanislaus | alpine, el dorado | alpine, mono | fresno, san benito |
| placer, sacramento | shasta, trinity | lassen, modoc | calaveras, tuolumne |
| alpine, amador | alpine, amador | modoc, shasta | lake, mendocino |
| colusa, glenn | alpine, mono | merced, santa clara | lake, sonoma |
| plumas, yuba | humboldt, trinity | contra costa, sacramento | alpine, tuolumne |
| mendocino, tehama | orange, san diego | colusa, glenn | alpine, el dorado |
| del norte, siskiyou | kings, monterey | kings, monterey | modoc, shasta |
| lassen, modoc | napa, solano | napa, solano | napa, solano |
b. Estimated FDR curves
T_edge1 = seq(0, 135, 1)[-1]
threshold1 = sort(pvij1, decreasing = TRUE)[T_edge1]
threshold2 = sort(pvij2, decreasing = TRUE)[T_edge1]
threshold3 = sort(pvij3, decreasing = TRUE)[T_edge1]
threshold4 = sort(pvij4, decreasing = TRUE)[T_edge1]
FDR_est1 <- rep(0, length(T_edge1))
FDR_est2 <- rep(0, length(T_edge1))
FDR_est3 <- rep(0, length(T_edge1))
FDR_est4 <- rep(0, length(T_edge1))
for(i in 1:length(threshold1)){
th1 <- threshold1[i]
est_diff1 <- as.numeric(pvij1 >= th1)
FDR_est1[i] <- sum((1-pvij1) * est_diff1) / sum(est_diff1)
th2 <- threshold2[i]
est_diff2 <- as.numeric(pvij2 >= th2)
FDR_est2[i] <- sum((1-pvij2) * est_diff2) / sum(est_diff2)
th3 <- threshold3[i]
est_diff3 <- as.numeric(pvij3 >= th3)
FDR_est3[i] <- sum((1-pvij3) * est_diff3) / sum(est_diff3)
th4 <- threshold4[i]
est_diff4 <- as.numeric(pvij4 >= th4)
FDR_est4[i] <- sum((1-pvij4) * est_diff4) / sum(est_diff4)
}
plot(FDR_est1,type="l", lty = 1, xlab="Number of edges selected", ylab = "Estimated FDR", ylim=c(0,0.5))
lines(FDR_est2,type="l", lty = 2)
lines(FDR_est3,type="l", lty = 3)
lines(FDR_est4,type="l", lty = 4)
legend("topleft", legend=c("Lung", "Esophageal", "Layrnx", "Colorectal"),
lty=1:4, cex=0.7)
c. Difference boundary detection
By setting FDR threshold the same as MDAGAR (\(0.025\)), we get difference boundaries detected for each cancer individually.
# Set threshold for FDR: 0.025
alpha_n = 0.025
T1 = sum(FDR_est1<=alpha_n)
T2 = sum(FDR_est2<=alpha_n)
T3 = sum(FDR_est3<=alpha_n)
T4 = sum(FDR_est4<=alpha_n)
# Difference boundary for each cancer with FDR <= 0.025
est_diff1 <- as.numeric(pvij1 >= threshold1[T1])
est_diff1_1 <- as.numeric(pvij1 >= threshold1[75])
est_diff2 <- as.numeric(pvij2 >= threshold2[T2])
est_diff3 <- as.numeric(pvij3 >= threshold3[T3])
est_diff4 <- as.numeric(pvij4 >= threshold4[T4])
# Lung cancer
edge_plot1 <- ggplot() +
geom_polygon(data = ca.poly.df, aes(long, lat, group = group),
color = "black",
fill = "white"
)
for(i in which(est_diff1 == 1)){
edge_plot1 = edge_plot1 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "dodgerblue") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=16, face="bold",hjust = 0.5)) +
ggtitle(paste("Lung (T = ", T1, ")", sep=""))
}
for(i in which(est_diff1_1 == 1)){
edge_plot1 = edge_plot1 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red")
}
# Esophageal cancer
edge_plot2 <- ggplot() +
geom_polygon(data = ca.poly.df, aes(long, lat, group = group),
color = "black",
fill = "white"
)
for(i in which(est_diff2 == 1)){
edge_plot2 = edge_plot2 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=16, face="bold",hjust = 0.5)) +
ggtitle(paste("Esophageal (T = ", T2, ")", sep=""))
}
# Larynx cancer
edge_plot3 <- ggplot() +
geom_polygon(data = ca.poly.df, aes(long, lat, group = group),
color = "black",
fill = "white"
)
for(i in which(est_diff3 == 1)){
edge_plot3 = edge_plot3 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=16, face="bold",hjust = 0.5)) +
ggtitle(paste("Larynx (T = ", T3, ")", sep=""))
}
# Colorectal cancer
edge_plot4 <- ggplot() +
geom_polygon(data = ca.poly.df, aes(long, lat, group = group),
color = "black",
fill = "white"
)
for(i in which(est_diff4 == 1)){
edge_plot4 = edge_plot4 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=16, face="bold",hjust = 0.5)) +
ggtitle(paste("Colorectal (T = ", T4, ")", sep=""))
}
text <- "Figure 5. Difference boundaries (highlighted in red and blue) detected by MCAR in SIR map for four cancers individually.\n The blue boundaries are those not included in top 75 edges for lung cancer. The values in brackets are the number of difference boundaries detected"
tgrob <- text_grob(text,size = 20)
plot_0 <- as_ggplot(tgrob) + theme(plot.margin = margin(0,3,0,27, "cm"))
ggarrange(plot_0, NULL, edge_plot1, edge_plot2, edge_plot3, edge_plot4, nrow = 3, ncol = 2, heights = c(1,5,5))
Y_rep1 <- matrix(0, nrow = n, ncol = 10000)
Y_rep2 <- matrix(0, nrow = n, ncol = 10000)
Y_rep3 <- matrix(0, nrow = n, ncol = 10000)
Y_rep4 <- matrix(0, nrow = n, ncol = 10000)
for(i in 1:10000){
beta1 <- mcmc_samples$beta[i,1]
beta2 <- mcmc_samples$beta[i,2]
beta3 <- mcmc_samples$beta[i,3]
beta4 <- mcmc_samples$beta[i,4]
W1_mcmc = mcmc_samples$phi[i,1:58]
W2_mcmc = mcmc_samples$phi[i,59:116]
W3_mcmc = mcmc_samples$phi[i,117:174]
W4_mcmc = mcmc_samples$phi[i,175:232]
Y_rep1[,i] <- rpois(n, E1 * exp(X1[,1] * beta1 + W1_mcmc)) / E1
Y_rep2[,i] <- rpois(n, E2 * exp(X2[,1] * beta2 + W2_mcmc)) / E2
Y_rep3[,i] <- rpois(n, E3 * exp(X3[,1] * beta3 + W3_mcmc)) / E3
Y_rep4[,i] <- rpois(n, E4 * exp(X4[,1] * beta4 + W4_mcmc)) / E4
}
mu_rep1 = rowMeans(Y_rep1)
mu_rep2 = rowMeans(Y_rep2)
mu_rep3 = rowMeans(Y_rep3)
mu_rep4 = rowMeans(Y_rep4)
var_rep1 = rowVars(Y_rep1)
var_rep2 = rowVars(Y_rep2)
var_rep3 = rowVars(Y_rep3)
var_rep4 = rowVars(Y_rep4)
G.latent1 = sum((Y1/E1 - mu_rep1)^2)
P.latent1 = sum(var_rep1)
D.latent1 = G.latent1 + P.latent1
G.latent2 = sum((Y2/E2 - mu_rep2)^2)
P.latent2 = sum(var_rep2)
D.latent2 = G.latent2 + P.latent2
G.latent3 = sum((Y3/E3 - mu_rep3)^2)
P.latent3 = sum(var_rep3)
D.latent3 = G.latent3 + P.latent3
G.latent4 = sum((Y4/E4 - mu_rep4)^2)
P.latent4 = sum(var_rep4)
D.latent4 = G.latent4 + P.latent4
D_CAR = c(D.latent1, D.latent2, D.latent3, D.latent4)
D_CAR
## [1] 2.314186 26.053475 43.448061 2.057750
sum(D_CAR)
## [1] 73.87347
You can also apply other analysis as MDAGAR here for more details.
sink("MDAGAR_ARDP_ind.txt")
cat("
model
{
for (i in 1:k)
{
log(mu[i]) <- log(E[i]) + X[i,] %*% beta + phi[i]
Y[i] ~ dpois(mu[i])
phi[i] <- inprod(theta, u[i,])
Fr[i] <- pnorm(r[i], 0, 1/invQ1[i,i])
for(h in 2:H){
u[i, h] <- ifelse(Fr[i] > sum_pi[h-1] && Fr[i] < sum_pi[h], 1, 0)
}
u[i,1] <- ifelse(sum(u[i, 2:H])==0, 1, 0)
}
for (i in (k+1):(2*k))
{
log(mu[i]) <- log(E[i]) + X[i,] %*% beta + phi[i]
Y[i] ~ dpois(mu[i])
phi[i] <- inprod(theta, u[i,])
Fr[i] <- pnorm(r[i], 0, 1/invQ2[i-k,i-k])
for(h in 2:H){
u[i, h] <- ifelse(Fr[i] > sum_pi[h-1] && Fr[i] < sum_pi[h], 1, 0)
}
u[i,1] <- ifelse(sum(u[i, 2:H])==0, 1, 0)
}
for (i in (2*k+1):(3*k))
{
log(mu[i]) <- log(E[i]) + X[i,] %*% beta + phi[i]
Y[i] ~ dpois(mu[i])
phi[i] <- inprod(theta, u[i,])
Fr[i] <- pnorm(r[i], 0, 1/invQ3[i-2*k,i-2*k])
for(h in 2:H){
u[i, h] <- ifelse(Fr[i] > sum_pi[h-1] && Fr[i] < sum_pi[h], 1, 0)
}
u[i,1] <- ifelse(sum(u[i, 2:H])==0, 1, 0)
}
for (i in (3*k+1):(4*k))
{
log(mu[i]) <- log(E[i]) + X[i,] %*% beta + phi[i]
Y[i] ~ dpois(mu[i])
phi[i] <- inprod(theta, u[i,])
Fr[i] <- pnorm(r[i], 0, 1/invQ4[i-3*k,i-3*k])
for(h in 2:H){
u[i, h] <- ifelse(Fr[i] > sum_pi[h-1] && Fr[i] < sum_pi[h], 1, 0)
}
u[i,1] <- ifelse(sum(u[i, 2:H])==0, 1, 0)
}
pi[1] <- V[1]
prod[1] <- 1 - pi[1]
for (h in 2:H){
prod[h] <- prod[h-1] * (1-V[h])
pi[h] <- V[h] * prod[h-1]
}
for (h in 1:H){
theta[h] ~ dnorm(0,taus)
V[h] ~ dbeta(1,alpha)
sum_pi[h] <- sum(pi[1:h])
}
for(i in 1:2){
Tau1[i] <- 1
Tau2[i] <- 1
Tau3[i] <- 1
Tau4[i] <- 1
r1[i] ~ dnorm(0, tau1*Tau1[i])
r2[i] ~ dnorm(0, tau2*Tau2[i])
r3[i] ~ dnorm(0, tau3*Tau3[i])
r4[i] ~ dnorm(0, tau4*Tau4[i])
}
for (i in 3:k){
Tau1[i] <- (1 + (ns[i-1] - 1) * rho1^2) / (1 - rho1^2)
Tau2[i] <- (1 + (ns[i-1] - 1) * rho2^2) / (1 - rho2^2)
Tau3[i] <- (1 + (ns[i-1] - 1) * rho3^2) / (1 - rho3^2)
Tau4[i] <- (1 + (ns[i-1] - 1) * rho4^2) / (1 - rho4^2)
b1[i] <- rho1 / (1 + (ns[i-1] - 1) * rho1^2)
b2[i] <- rho2 / (1 + (ns[i-1] - 1) * rho2^2)
b3[i] <- rho3 / (1 + (ns[i-1] - 1) * rho3^2)
b4[i] <- rho4 / (1 + (ns[i-1] - 1) * rho4^2)
for(j in 1:ns[i-1]){
t1[i,j] <- r1[udnei[(cn[i-1] + j)]] * b1[i]
t2[i,j] <- r2[udnei[(cn[i-1] + j)]] * b2[i]
t3[i,j] <- r3[udnei[(cn[i-1] + j)]] * b3[i]
t4[i,j] <- r4[udnei[(cn[i-1] + j)]] * b4[i]
}
for(j in (ns[i-1]+1):mns){
t1[i,j] <- 0
t2[i,j] <- 0
t3[i,j] <- 0
t4[i,j] <- 0
}
r1[i] ~ dnorm(sum(t1[i,]), tau1*Tau1[i])
r2[i] ~ dnorm(sum(t2[i,]), tau2*Tau2[i])
r3[i] ~ dnorm(sum(t3[i,]), tau3*Tau3[i])
r4[i] ~ dnorm(sum(t4[i,]), tau4*Tau4[i])
}
for(i in 1:2){
Taum1[i, i] <- 1
Taum2[i, i] <- 1
Taum3[i, i] <- 1
Taum4[i, i] <- 1
for(j in 1:k){
B1[i,j] <- 0
B2[i,j] <- 0
B3[i,j] <- 0
B4[i,j] <- 0
}
}
for(l in 1:(k-1)){
for(m in (l+1):k){
Taum1[l,m] <- 0
Taum2[l,m] <- 0
Taum3[l,m] <- 0
Taum4[l,m] <- 0
}
}
Taum1[2,1] <- 0
Taum2[2,1] <- 0
Taum3[2,1] <- 0
Taum4[2,1] <- 0
for (i in 3:k) {
Taum1[i,i] <- Tau1[i]
Taum2[i,i] <- Tau2[i]
Taum3[i,i] <- Tau3[i]
Taum4[i,i] <- Tau4[i]
for(m in 1:(i-1)){
Taum1[i,m] <- 0
Taum2[i,m] <- 0
Taum3[i,m] <- 0
Taum4[i,m] <- 0
}
for(j in (udnei[(cn[i-1] + 1):(cn[i-1] + ns[i-1])])){
B1[i,j] <- b1[i]
B2[i,j] <- b2[i]
B3[i,j] <- b3[i]
B4[i,j] <- b4[i]
}
for(j in index1[((k)*(i-3)-cn[i-1]+1) : ((k)*(i-3)-cn[i-1] + (k - ns[i-1]))]){
B1[i,j] <- 0
B2[i,j] <- 0
B3[i,j] <- 0
B4[i,j] <- 0
}
}
Q1 <- tau1 * t(Ik - B1) %*% Taum1 %*% (Ik - B1)
Q2 <- tau2 * t(Ik - B2) %*% Taum2 %*% (Ik - B2)
Q3 <- tau3 * t(Ik - B3) %*% Taum3 %*% (Ik - B3)
Q4 <- tau4 * t(Ik - B4) %*% Taum4 %*% (Ik - B4)
r[1:k] <- r1[1:k]
r[(k+1):(2*k)] <- r2[1:k]
r[(2*k+1):(3*k)] <- r3[1:k]
r[(3*k+1):(4*k)] <- r4[1:k]
invQ1 <- inverse(Q1)
invQ2 <- inverse(Q2)
invQ3 <- inverse(Q3)
invQ4 <- inverse(Q4)
rho1 ~ dunif(0, 0.98)
rho2 ~ dunif(0, 0.98)
rho3 ~ dunif(0, 0.98)
rho4 ~ dunif(0, 0.98)
tau1 ~ dgamma(2, 0.1)
tau2 ~ dgamma(2, 0.1)
tau3 ~ dgamma(2, 0.1)
tau4 ~ dgamma(2, 0.1)
taus ~ dgamma(2,1)
beta[1:ncolumn] ~ dmnorm(rep(0,ncolumn), (0.0001*I));
}
", fill = TRUE)
sink()
model.data1 <- list(k = n, I = diag(ncol(X)), X = X, Y = Y, E = E, Ik = diag(n), index1 = index1,
alpha = 1, ncolumn = ncol(X), H = 15, ns = dni, udnei = udnei, cn = c(0, cni))
model.inits <- rep(list(list(rho1 = 0.1, rho2 = 0.1, rho3 = 0.1, rho4 = 0.1, tau1 = 1, tau2 = 1, tau3 = 1, tau4 = 1,
taus = 1, beta = rep(0, ncol(X)))),2)
model.param <- c("beta", "rho1", "rho2", "rho3", "rho4", "tau1", "tau2", "tau3", "tau4",
"taus", "phi", "u")
set.seed(123)
mcmc_samples <- jags(model.data1, model.inits, model.param, "MDAGAR_ARDP_ind.txt",
n.chains = 2, n.iter = 25000,n.burnin = 20000, n.thin = 1)
# Parameter estimates
sample.mcmc = mcmc_samples$BUGSoutput$sims.matrix[,c(246, 238:245)]
phis <- mcmc_samples$BUGSoutput$sims.matrix[,6:237]
phis1 <- phis[,1:58][,order(final_perm)]
phis2 <- phis[,59:116][,order(final_perm)]
phis3 <- phis[,117:174][,order(final_perm)]
phis4 <- phis[,175:232][,order(final_perm)]
phis_origin <- cbind(phis1, phis2, phis3, phis4)
We calculate posterior probabilities: \(P(\phi_{id} \neq \phi_{jd}), \quad i \sim j\)
vij_samples1 <- t(apply(phis1, 1, function(x){
diff_b1 <- NULL
for(i in 1:nrow(neighbor_list0)){
if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
diff_b1 <- c(diff_b1, 1)
}else{
diff_b1 <- c(diff_b1, 0)
}
}
return(diff_b1)
}))
vij_samples2 <- t(apply(phis2, 1, function(x){
diff_b1 <- NULL
for(i in 1:nrow(neighbor_list0)){
if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
diff_b1 <- c(diff_b1, 1)
}else{
diff_b1 <- c(diff_b1, 0)
}
}
return(diff_b1)
}))
vij_samples3 <- t(apply(phis3, 1, function(x){
diff_b1 <- NULL
for(i in 1:nrow(neighbor_list0)){
if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
diff_b1 <- c(diff_b1, 1)
}else{
diff_b1 <- c(diff_b1, 0)
}
}
return(diff_b1)
}))
vij_samples4 <- t(apply(phis4, 1, function(x){
diff_b1 <- NULL
for(i in 1:nrow(neighbor_list0)){
if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
diff_b1 <- c(diff_b1, 1)
}else{
diff_b1 <- c(diff_b1, 0)
}
}
return(diff_b1)
}))
# probabilities: P(phi_id != phi_jd), i~j
pvij1 <- apply(vij_samples1, 2, mean)
pvij2 <- apply(vij_samples2, 2, mean)
pvij3 <- apply(vij_samples3, 2, mean)
pvij4 <- apply(vij_samples4, 2, mean)
names(pvij1) <- neighbor_name
names(pvij2) <- neighbor_name
names(pvij3) <- neighbor_name
names(pvij4) <- neighbor_name
a. Estimated FDR curves
T_edge1 = seq(0, 135, 1)[-1]
threshold1 = sort(pvij1, decreasing = TRUE)[T_edge1]
threshold2 = sort(pvij2, decreasing = TRUE)[T_edge1]
threshold3 = sort(pvij3, decreasing = TRUE)[T_edge1]
threshold4 = sort(pvij4, decreasing = TRUE)[T_edge1]
FDR_est1 <- rep(0, length(T_edge1))
FDR_est2 <- rep(0, length(T_edge1))
FDR_est3 <- rep(0, length(T_edge1))
FDR_est4 <- rep(0, length(T_edge1))
for(i in 1:length(threshold1)){
th1 <- threshold1[i]
est_diff1 <- as.numeric(pvij1 >= th1)
FDR_est1[i] <- sum((1-pvij1) * est_diff1) / sum(est_diff1)
th2 <- threshold2[i]
est_diff2 <- as.numeric(pvij2 >= th2)
FDR_est2[i] <- sum((1-pvij2) * est_diff2) / sum(est_diff2)
th3 <- threshold3[i]
est_diff3 <- as.numeric(pvij3 >= th3)
FDR_est3[i] <- sum((1-pvij3) * est_diff3) / sum(est_diff3)
th4 <- threshold4[i]
est_diff4 <- as.numeric(pvij4 >= th4)
FDR_est4[i] <- sum((1-pvij4) * est_diff4) / sum(est_diff4)
}
plot(FDR_est1,type="l", lty = 1, xlab="Number of edges selected", ylab = "Estimated FDR", ylim=c(0,0.5))
lines(FDR_est2,type="l", lty = 2)
lines(FDR_est3,type="l", lty = 3)
lines(FDR_est4,type="l", lty = 4)
legend("topleft", legend=c("Lung", "Esophageal", "Layrnx", "Colorectal"),
lty=1:4, cex=0.7)
b. Difference boundary detection
By setting FDR threshold as \(0.1\), we get difference boundaries detected for each cancer individually.
# Set threshold for FDR: 0.025
alpha_n = 0.1
T1 = sum(FDR_est1<=alpha_n)
T2 = sum(FDR_est2<=alpha_n)
T3 = sum(FDR_est3<=alpha_n)
T4 = sum(FDR_est4<=alpha_n)
# Difference boundary for each cancer with FDR <= 0.025
est_diff1 <- as.numeric(pvij1 >= threshold1[T1])
est_diff1_1 <- as.numeric(pvij1 >= threshold1[75])
est_diff2 <- as.numeric(pvij2 >= threshold2[T2])
est_diff3 <- as.numeric(pvij3 >= threshold3[T3])
est_diff4 <- as.numeric(pvij4 >= threshold4[T4])
# Lung cancer
edge_plot1 <- ggplot() +
geom_polygon(data = ca.poly.df, aes(long, lat, group = group),
color = "black",
fill = "white"
)
for(i in which(est_diff1 == 1)){
edge_plot1 = edge_plot1 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "dodgerblue") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=16, face="bold",hjust = 0.5)) +
ggtitle(paste("Lung (T = ", T1, ")", sep=""))
}
for(i in which(est_diff1_1 == 1)){
edge_plot1 = edge_plot1 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red")
}
# Esophageal cancer
edge_plot2 <- ggplot() +
geom_polygon(data = ca.poly.df, aes(long, lat, group = group),
color = "black",
fill = "white"
)
for(i in which(est_diff2 == 1)){
edge_plot2 = edge_plot2 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=16, face="bold",hjust = 0.5)) +
ggtitle(paste("Esophageal (T = ", T2, ")", sep=""))
}
# Larynx cancer
edge_plot3 <- ggplot() +
geom_polygon(data = ca.poly.df, aes(long, lat, group = group),
color = "black",
fill = "white"
)
for(i in which(est_diff3 == 1)){
edge_plot3 = edge_plot3 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=16, face="bold",hjust = 0.5)) +
ggtitle(paste("Larynx (T = ", T3, ")", sep=""))
}
# Colorectal cancer
edge_plot4 <- ggplot() +
geom_polygon(data = ca.poly.df, aes(long, lat, group = group),
color = "black",
fill = "white"
)
for(i in which(est_diff4 == 1)){
edge_plot4 = edge_plot4 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=16, face="bold",hjust = 0.5)) +
ggtitle(paste("Colorectal (T = ", T4, ")", sep=""))
}
text <- "Figure 6. Difference boundaries (highlighted in red and blue) detected by DAGAR_ind in SIR map for four cancers individually.\n The blue boundaries are those not included in top 75 edges for lung cancer. The values in brackets are the number of difference boundaries detected"
tgrob <- text_grob(text,size = 20)
plot_0 <- as_ggplot(tgrob) + theme(plot.margin = margin(0,3,0,27, "cm"))
ggarrange(plot_0, NULL, edge_plot1, edge_plot2, edge_plot3, edge_plot4, nrow = 3, ncol = 2, heights = c(1,5,5))
Y_rep1 <- matrix(0, nrow = n, ncol = 10000)
Y_rep2 <- matrix(0, nrow = n, ncol = 10000)
Y_rep3 <- matrix(0, nrow = n, ncol = 10000)
Y_rep4 <- matrix(0, nrow = n, ncol = 10000)
for(i in 1:10000){
beta1 <- mcmc_samples$beta[i,1]
beta2 <- mcmc_samples$beta[i,2]
beta3 <- mcmc_samples$beta[i,3]
beta4 <- mcmc_samples$beta[i,4]
W1_mcmc = mcmc_samples$phi[i,1:58]
W2_mcmc = mcmc_samples$phi[i,59:116]
W3_mcmc = mcmc_samples$phi[i,117:174]
W4_mcmc = mcmc_samples$phi[i,175:232]
Y_rep1[,i] <- rpois(n, E1 * exp(X1[,1] * beta1 + W1_mcmc)) / E1
Y_rep2[,i] <- rpois(n, E2 * exp(X2[,1] * beta2 + W2_mcmc)) / E2
Y_rep3[,i] <- rpois(n, E3 * exp(X3[,1] * beta3 + W3_mcmc)) / E3
Y_rep4[,i] <- rpois(n, E4 * exp(X4[,1] * beta4 + W4_mcmc)) / E4
}
mu_rep1 = rowMeans(Y_rep1)
mu_rep2 = rowMeans(Y_rep2)
mu_rep3 = rowMeans(Y_rep3)
mu_rep4 = rowMeans(Y_rep4)
var_rep1 = rowVars(Y_rep1)
var_rep2 = rowVars(Y_rep2)
var_rep3 = rowVars(Y_rep3)
var_rep4 = rowVars(Y_rep4)
G.latent1 = sum((Y1/E1 - mu_rep1)^2)
P.latent1 = sum(var_rep1)
D.latent1 = G.latent1 + P.latent1
G.latent2 = sum((Y2/E2 - mu_rep2)^2)
P.latent2 = sum(var_rep2)
D.latent2 = G.latent2 + P.latent2
G.latent3 = sum((Y3/E3 - mu_rep3)^2)
P.latent3 = sum(var_rep3)
D.latent3 = G.latent3 + P.latent3
G.latent4 = sum((Y4/E4 - mu_rep4)^2)
P.latent4 = sum(var_rep4)
D.latent4 = G.latent4 + P.latent4
D_DAGARind = c(D.latent1, D.latent2, D.latent3, D.latent4)
D_DAGARind
sum(D_DAGARind)
sink("MCAR_ARDP_ind.txt")
cat("
model
{
for (i in 1:k)
{
log(mu[i]) <- log(E[i]) + X[i,] %*% beta + phi[i]
Y[i] ~ dpois(mu[i])
phi[i] <- inprod(theta, u[i,])
Fr[i] <- pnorm(r[i], 0, 1/invQ1[i,i])
for(h in 2:H){
u[i, h] <- ifelse(Fr[i] > sum_pi[h-1] && Fr[i] < sum_pi[h], 1, 0)
}
u[i,1] <- ifelse(sum(u[i, 2:H])==0, 1, 0)
}
for (i in (k+1):(2*k))
{
log(mu[i]) <- log(E[i]) + X[i,] %*% beta + phi[i]
Y[i] ~ dpois(mu[i])
phi[i] <- inprod(theta, u[i,])
Fr[i] <- pnorm(r[i], 0, 1/invQ2[i-k,i-k])
for(h in 2:H){
u[i, h] <- ifelse(Fr[i] > sum_pi[h-1] && Fr[i] < sum_pi[h], 1, 0)
}
u[i,1] <- ifelse(sum(u[i, 2:H])==0, 1, 0)
}
for (i in (2*k+1):(3*k))
{
log(mu[i]) <- log(E[i]) + X[i,] %*% beta + phi[i]
Y[i] ~ dpois(mu[i])
phi[i] <- inprod(theta, u[i,])
Fr[i] <- pnorm(r[i], 0, 1/invQ3[i-2*k,i-2*k])
for(h in 2:H){
u[i, h] <- ifelse(Fr[i] > sum_pi[h-1] && Fr[i] < sum_pi[h], 1, 0)
}
u[i,1] <- ifelse(sum(u[i, 2:H])==0, 1, 0)
}
for (i in (3*k+1):(4*k))
{
log(mu[i]) <- log(E[i]) + X[i,] %*% beta + phi[i]
Y[i] ~ dpois(mu[i])
phi[i] <- inprod(theta, u[i,])
Fr[i] <- pnorm(r[i], 0, 1/invQ4[i-3*k,i-3*k])
for(h in 2:H){
u[i, h] <- ifelse(Fr[i] > sum_pi[h-1] && Fr[i] < sum_pi[h], 1, 0)
}
u[i,1] <- ifelse(sum(u[i, 2:H])==0, 1, 0)
}
pi[1] <- V[1]
prod[1] <- 1 - pi[1]
for (h in 2:(H-1)){
prod[h] <- prod[h-1] * (1-V[h])
pi[h] <- V[h] * prod[h-1]
}
pi[H] <- 1-sum(pi[1:(H-1)])
for (h in 1:H){
theta[h] ~ dnorm(0,taus)
V[h] ~ dbeta(1,alpha)
sum_pi[h] <- sum(pi[1:h])
}
Q1 <- tau1 * (D - rho1 * Minc)
Q2 <- tau2 * (D - rho2 * Minc)
Q3 <- tau3 * (D - rho3 * Minc)
Q4 <- tau4 * (D - rho4 * Minc)
r1[1:k] ~ dmnorm(rep(0, k), Q1)
r2[1:k] ~ dmnorm(rep(0, k), Q2)
r3[1:k] ~ dmnorm(rep(0, k), Q3)
r4[1:k] ~ dmnorm(rep(0, k), Q4)
r[1:k] <- r1[1:k]
r[(k+1):(2*k)] <- r2[1:k]
r[(2*k+1):(3*k)] <- r3[1:k]
r[(3*k+1):(4*k)] <- r4[1:k]
invQ1 <- inverse(Q1)
invQ2 <- inverse(Q2)
invQ3 <- inverse(Q3)
invQ4 <- inverse(Q4)
rho1 ~ dunif(0, 0.98)
rho2 ~ dunif(0, 0.98)
rho3 ~ dunif(0, 0.98)
rho4 ~ dunif(0, 0.98)
tau1 ~ dgamma(2, 0.1)
tau2 ~ dgamma(2, 0.1)
tau3 ~ dgamma(2, 0.1)
tau4 ~ dgamma(2, 0.1)
taus ~ dgamma(2,1)
beta[1:ncolumn] ~ dmnorm(rep(0,ncolumn), (0.0001*I));
}
", fill = TRUE)
sink()
model.data1 <- list(k = n, I = diag(ncol(X)), X = X, Y = Y, E = E,
alpha = 1, ncolumn = ncol(X), H = 15, D = D, Minc = Minc)
model.inits <- rep(list(list(rho1 = 0.1, rho2 = 0.1, rho3 = 0.1, rho4 = 0.1, tau1 = 1, tau2 = 1, tau3 = 1, tau4 = 1,
taus = 1, beta = rep(0, ncol(X)))),2)
model.param <- c("beta", "rho1", "rho2", "rho3", "rho4", "tau1", "tau2", "tau3", "tau4",
"taus", "phi", "u")
set.seed(123)
mcmc_samples <- jags(model.data1, model.inits, model.param, "MCAR_ARDP_ind.txt",
n.chains = 2, n.iter = 25000,n.burnin = 20000, n.thin = 1)
# Parameter estimates
sample.mcmc = mcmc_samples$BUGSoutput$sims.matrix[,c(246, 238:245)]
phis <- mcmc_samples$BUGSoutput$sims.matrix[,6:237]
phis1 <- phis[,1:58][,order(final_perm)]
phis2 <- phis[,59:116][,order(final_perm)]
phis3 <- phis[,117:174][,order(final_perm)]
phis4 <- phis[,175:232][,order(final_perm)]
phis_origin <- cbind(phis1, phis2, phis3, phis4)
We calculate posterior probabilities: \(P(\phi_{id} \neq \phi_{jd}), \quad i \sim j\)
vij_samples1 <- t(apply(phis1, 1, function(x){
diff_b1 <- NULL
for(i in 1:nrow(neighbor_list0)){
if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
diff_b1 <- c(diff_b1, 1)
}else{
diff_b1 <- c(diff_b1, 0)
}
}
return(diff_b1)
}))
vij_samples2 <- t(apply(phis2, 1, function(x){
diff_b1 <- NULL
for(i in 1:nrow(neighbor_list0)){
if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
diff_b1 <- c(diff_b1, 1)
}else{
diff_b1 <- c(diff_b1, 0)
}
}
return(diff_b1)
}))
vij_samples3 <- t(apply(phis3, 1, function(x){
diff_b1 <- NULL
for(i in 1:nrow(neighbor_list0)){
if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
diff_b1 <- c(diff_b1, 1)
}else{
diff_b1 <- c(diff_b1, 0)
}
}
return(diff_b1)
}))
vij_samples4 <- t(apply(phis4, 1, function(x){
diff_b1 <- NULL
for(i in 1:nrow(neighbor_list0)){
if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
diff_b1 <- c(diff_b1, 1)
}else{
diff_b1 <- c(diff_b1, 0)
}
}
return(diff_b1)
}))
# probabilities: P(phi_id != phi_jd), i~j
pvij1 <- apply(vij_samples1, 2, mean)
pvij2 <- apply(vij_samples2, 2, mean)
pvij3 <- apply(vij_samples3, 2, mean)
pvij4 <- apply(vij_samples4, 2, mean)
names(pvij1) <- neighbor_name
names(pvij2) <- neighbor_name
names(pvij3) <- neighbor_name
names(pvij4) <- neighbor_name
a. Estimated FDR curves
T_edge1 = seq(0, 135, 1)[-1]
threshold1 = sort(pvij1, decreasing = TRUE)[T_edge1]
threshold2 = sort(pvij2, decreasing = TRUE)[T_edge1]
threshold3 = sort(pvij3, decreasing = TRUE)[T_edge1]
threshold4 = sort(pvij4, decreasing = TRUE)[T_edge1]
FDR_est1 <- rep(0, length(T_edge1))
FDR_est2 <- rep(0, length(T_edge1))
FDR_est3 <- rep(0, length(T_edge1))
FDR_est4 <- rep(0, length(T_edge1))
for(i in 1:length(threshold1)){
th1 <- threshold1[i]
est_diff1 <- as.numeric(pvij1 >= th1)
FDR_est1[i] <- sum((1-pvij1) * est_diff1) / sum(est_diff1)
th2 <- threshold2[i]
est_diff2 <- as.numeric(pvij2 >= th2)
FDR_est2[i] <- sum((1-pvij2) * est_diff2) / sum(est_diff2)
th3 <- threshold3[i]
est_diff3 <- as.numeric(pvij3 >= th3)
FDR_est3[i] <- sum((1-pvij3) * est_diff3) / sum(est_diff3)
th4 <- threshold4[i]
est_diff4 <- as.numeric(pvij4 >= th4)
FDR_est4[i] <- sum((1-pvij4) * est_diff4) / sum(est_diff4)
}
plot(FDR_est1,type="l", lty = 1, xlab="Number of edges selected", ylab = "Estimated FDR", ylim=c(0,0.5))
lines(FDR_est2,type="l", lty = 2)
lines(FDR_est3,type="l", lty = 3)
lines(FDR_est4,type="l", lty = 4)
legend("topleft", legend=c("Lung", "Esophageal", "Layrnx", "Colorectal"),
lty=1:4, cex=0.7)
b. Difference boundary detection
By setting FDR threshold as \(0.1\), we get difference boundaries detected for each cancer individually.
# Set threshold for FDR: 0.025
alpha_n = 0.1
T1 = sum(FDR_est1<=alpha_n)
T2 = sum(FDR_est2<=alpha_n)
T3 = sum(FDR_est3<=alpha_n)
T4 = sum(FDR_est4<=alpha_n)
# Difference boundary for each cancer with FDR <= 0.025
est_diff1 <- as.numeric(pvij1 >= threshold1[T1])
est_diff1_1 <- as.numeric(pvij1 >= threshold1[75])
est_diff2 <- as.numeric(pvij2 >= threshold2[T2])
est_diff3 <- as.numeric(pvij3 >= threshold3[T3])
est_diff4 <- as.numeric(pvij4 >= threshold4[T4])
# Lung cancer
edge_plot1 <- ggplot() +
geom_polygon(data = ca.poly.df, aes(long, lat, group = group),
color = "black",
fill = "white"
)
for(i in which(est_diff1 == 1)){
edge_plot1 = edge_plot1 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "dodgerblue") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=16, face="bold",hjust = 0.5)) +
ggtitle(paste("Lung (T = ", T1, ")", sep=""))
}
for(i in which(est_diff1_1 == 1)){
edge_plot1 = edge_plot1 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red")
}
# Esophageal cancer
edge_plot2 <- ggplot() +
geom_polygon(data = ca.poly.df, aes(long, lat, group = group),
color = "black",
fill = "white"
)
for(i in which(est_diff2 == 1)){
edge_plot2 = edge_plot2 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=16, face="bold",hjust = 0.5)) +
ggtitle(paste("Esophageal (T = ", T2, ")", sep=""))
}
# Larynx cancer
edge_plot3 <- ggplot() +
geom_polygon(data = ca.poly.df, aes(long, lat, group = group),
color = "black",
fill = "white"
)
for(i in which(est_diff3 == 1)){
edge_plot3 = edge_plot3 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=16, face="bold",hjust = 0.5)) +
ggtitle(paste("Larynx (T = ", T3, ")", sep=""))
}
# Colorectal cancer
edge_plot4 <- ggplot() +
geom_polygon(data = ca.poly.df, aes(long, lat, group = group),
color = "black",
fill = "white"
)
for(i in which(est_diff4 == 1)){
edge_plot4 = edge_plot4 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=16, face="bold",hjust = 0.5)) +
ggtitle(paste("Colorectal (T = ", T4, ")", sep=""))
}
text <- "Figure 6. Difference boundaries (highlighted in red and blue) detected by DAGAR_ind in SIR map for four cancers individually.\n The blue boundaries are those not included in top 75 edges for lung cancer. The values in brackets are the number of difference boundaries detected"
tgrob <- text_grob(text,size = 20)
plot_0 <- as_ggplot(tgrob) + theme(plot.margin = margin(0,3,0,27, "cm"))
ggarrange(plot_0, NULL, edge_plot1, edge_plot2, edge_plot3, edge_plot4, nrow = 3, ncol = 2, heights = c(1,5,5))
Y_rep1 <- matrix(0, nrow = n, ncol = 10000)
Y_rep2 <- matrix(0, nrow = n, ncol = 10000)
Y_rep3 <- matrix(0, nrow = n, ncol = 10000)
Y_rep4 <- matrix(0, nrow = n, ncol = 10000)
for(i in 1:10000){
beta1 <- mcmc_samples$beta[i,1]
beta2 <- mcmc_samples$beta[i,2]
beta3 <- mcmc_samples$beta[i,3]
beta4 <- mcmc_samples$beta[i,4]
W1_mcmc = mcmc_samples$phi[i,1:58]
W2_mcmc = mcmc_samples$phi[i,59:116]
W3_mcmc = mcmc_samples$phi[i,117:174]
W4_mcmc = mcmc_samples$phi[i,175:232]
Y_rep1[,i] <- rpois(n, E1 * exp(X1[,1] * beta1 + W1_mcmc)) / E1
Y_rep2[,i] <- rpois(n, E2 * exp(X2[,1] * beta2 + W2_mcmc)) / E2
Y_rep3[,i] <- rpois(n, E3 * exp(X3[,1] * beta3 + W3_mcmc)) / E3
Y_rep4[,i] <- rpois(n, E4 * exp(X4[,1] * beta4 + W4_mcmc)) / E4
}
mu_rep1 = rowMeans(Y_rep1)
mu_rep2 = rowMeans(Y_rep2)
mu_rep3 = rowMeans(Y_rep3)
mu_rep4 = rowMeans(Y_rep4)
var_rep1 = rowVars(Y_rep1)
var_rep2 = rowVars(Y_rep2)
var_rep3 = rowVars(Y_rep3)
var_rep4 = rowVars(Y_rep4)
G.latent1 = sum((Y1/E1 - mu_rep1)^2)
P.latent1 = sum(var_rep1)
D.latent1 = G.latent1 + P.latent1
G.latent2 = sum((Y2/E2 - mu_rep2)^2)
P.latent2 = sum(var_rep2)
D.latent2 = G.latent2 + P.latent2
G.latent3 = sum((Y3/E3 - mu_rep3)^2)
P.latent3 = sum(var_rep3)
D.latent3 = G.latent3 + P.latent3
G.latent4 = sum((Y4/E4 - mu_rep4)^2)
P.latent4 = sum(var_rep4)
D.latent4 = G.latent4 + P.latent4
D_CARind = c(D.latent1, D.latent2, D.latent3, D.latent4)
D_CARind
sum(D_CARind)
Accounting for covariates would change difference boundaries detected for each cancer. Here we include two examples: (1) only smoking rates as covariate; (2) all covariates listed in Section 2.1
# Construct covariates matrix
X = as.matrix(bdiag(bdiag(X1[,1:2], X2[,1:2]), bdiag(X3[,1:2],X4[,1:2])))
ARDP_joint_diff_smoke<-function(y=Y, x1=as.matrix(X1[,1:2]), x2=as.matrix(X2[,1:2]), x3 = as.matrix(X3[,1:2]), x4 = as.matrix(X4[,1:2]),
X = X, E = E, Minc, alpha=1, q = 4, n.atoms=15, runs=10000, burn=1000){
#y: data
#x: covariates
#n.atoms: number of atoms in the mixture dist.
#theta: the theta's (iid from baseline) in the model
#alpha: v~beta(1,alpha)
#u: the index indicator of spatial random effect
#rho: sptatial autocorrelation parameter in DAGAR
#Minc: 0-1 adjacency matrix
#V_r: covariance matrix of joint MDAGAR
#Q: presicion matrix of DAGAR
#r: random effects following DAGAR
#F_r: Marginal CDF of r
#taued: presicion in Baseline of DP for disease d
#taus: precision for theta
nq<-length(y)
n <- nq / q
p<-ncol(X)
y1 <- y[1:n]
y2 <- y[(n+1):(2*n)]
y3 <- y[(2*n+1):(3*n)]
y4 <- y[(3*n+1):(4*n)]
E1 <- E[1:n]
E2 <- E[(n+1):(2*n)]
E3 <- E[(2*n+1):(3*n)]
E4 <- E[(3*n+1):(4*n)]
sigmasq_beta = 10000
keepbeta=matrix(0,runs,p)
keepphi=matrix(0,runs,nq)
keeptheta=matrix(0,runs,n.atoms)
keepA=matrix(0,runs,10)
keepu=matrix(0,runs,nq)
keeprho1=keeprho2=keeprho3=keeprho4=keeptaus = rep(0,runs)
keepv=matrix(0,runs,n.atoms)
keepr=matrix(0,runs,nq)
keepFr=matrix(0,runs,nq)
#initial values
theta=rep(0,n.atoms)
beta1<-rep(0,ncol(x1))
beta2<-rep(0,ncol(x2))
beta3<-rep(0,ncol(x3))
beta4<-rep(0,ncol(x4))
beta = c(beta1, beta2, beta3, beta4)
taus = 1
c=2
d=0.1
d2=1
v<-rep(.1,n.atoms)
probs=makeprobs(v)
#A matrix
rho=c(0.5, 0.5, 0.5, 0.5)
eta = log(rho/(1-rho))
A = matrix(0, q, q)
for(i in 1:q){
for(j in 1:i){
if(j == i){
A[i, j] = exp(rnorm(1))
}else{
A[i, j] = rnorm(1)
}
}
}
Q = Dinv_new(Rho = rho, n, cn, ns, udnei, q)
invQ1 = solve(Q[[1]])
invQ2 = solve(Q[[2]])
invQ3 = solve(Q[[3]])
invQ4 = solve(Q[[4]])
kprod = kronecker(A, diag(n))
invQ = as.matrix(bdiag(bdiag(invQ1, invQ2), bdiag(invQ3, invQ4)))
Vr = as.matrix(forceSymmetric(kprod %*% invQ %*% t(kprod)))
r = rmvnorm(1, rep(0, nq), Vr)
F_r=pnorm(r,0,sqrt(diag(Vr)))
u=makeu(F_r,probs)
phi <- theta[u]
nu = 2
R = 0.1 * diag(q)
acceptr=acceptv=acceptrho=acceptA=0
acceptbeta1 = acceptbeta2=acceptbeta3=acceptbeta4 = 0
accepttheta = 0
count<-afterburn<-0
burn = burn + 1
for(iter in 1:runs){
if(iter == runs){
print(iter)
print(acceptA/(iter-1))
print(acceptr/nq/(iter-1))
print(acceptrho/(iter-1))
print(acceptv/n.atoms/(iter-1))
print(accepttheta/n.atoms/(iter-1))
print(acceptbeta1/(iter-1))
print(acceptbeta2/(iter-1))
print(acceptbeta3/(iter-1))
print(acceptbeta4/(iter-1))
}
######################
### update beta ###
######################
# update beta (intercept only model)
pro_beta1=rmvnorm(1,beta1,0.000003*diag(ncol(x1)))
MHrate1=sum(-E1*exp(pro_beta1%*%t(x1)+theta[u][1:n])+y1*(pro_beta1%*%t(x1)+theta[u][1:n]))-
sum(-E1*exp(beta1%*%t(x1)+theta[u][1:n])+y1*(beta1%*%t(x1)+theta[u][1:n]))
if(runif(1,0,1)<exp(MHrate1)){
beta1<-pro_beta1
acceptbeta1=acceptbeta1+1
}
pro_beta2=rmvnorm(1,beta2,0.00003*diag(ncol(x2)))
MHrate2=sum(-E2*exp(pro_beta2%*%t(x2)+theta[u][(n+1):(2*n)])+y2*(pro_beta2%*%t(x2)+theta[u][(n+1):(2*n)]))-
sum(-E2*exp(beta2%*%t(x2)+theta[u][(n+1):(2*n)])+y2*(beta2%*%t(x2)+theta[u][(n+1):(2*n)]))
if(runif(1,0,1)<exp(MHrate2)){
beta2<-pro_beta2
acceptbeta2=acceptbeta2+1
}
pro_beta3=rmvnorm(1,beta3,0.00005*diag(ncol(x3)))
MHrate3=sum(-E3*exp(pro_beta3%*%t(x3)+theta[u][(2*n+1):(3*n)])+y3*(pro_beta3%*%t(x3)+theta[u][(2*n+1):(3*n)]))-
sum(-E3*exp(beta3%*%t(x3)+theta[u][(2*n+1):(3*n)])+y3*(beta3%*%t(x3)+theta[u][(2*n+1):(3*n)]))
if(runif(1,0,1)<exp(MHrate3)){
beta3<-pro_beta3
acceptbeta3=acceptbeta3+1
}
pro_beta4=rmvnorm(1,beta4,0.000003*diag(ncol(x4)))
MHrate4=sum(-E4*exp(pro_beta4%*%t(x4)+theta[u][(3*n+1):(4*n)])+y4*(pro_beta4%*%t(x4)+theta[u][(3*n+1):(4*n)]))-
sum(-E4*exp(beta4%*%t(x4)+theta[u][(3*n+1):(4*n)])+y4*(beta4%*%t(x4)+theta[u][(3*n+1):(4*n)]))
if(runif(1,0,1)<exp(MHrate4)){
beta4<-pro_beta4
acceptbeta4=acceptbeta4+1
}
beta <- c(beta1, beta2, beta3, beta4)
#########################
### update theta ###
#########################
u1 = u[1:n]
u2 = u[(n+1):(2*n)]
u3 = u[(2*n+1):(3*n)]
u4 = u[(3*n+1):(4*n)]
for (j in 1:n.atoms){
count[j]=sum(u==j)
if (count[j]==0){
theta[j]=rnorm(1,0,sqrt(1/taus))
}else{
pro_theta=rnorm(1,theta[j],0.06)
MHrate<-sum(-(E[u==j])*exp(X[u==j,] %*% beta+pro_theta)+(y[u==j])*(X[u==j,] %*% beta+pro_theta))-taus/2*pro_theta^2-
sum(-(E[u==j])*exp(X[u==j,] %*% beta+theta[j])+(y[u==j])*(X[u==j,] %*% beta+theta[j]))+taus/2*theta[j]^2
if(runif(1,0,1)<exp(MHrate)){
theta[j]<-pro_theta
accepttheta=accepttheta+1
}
}
}
######################
### update r ###
######################
for (k in 1:nq){
pro_r=r;pro_Fr=F_r;pro_u=u
pro_r[k]=rnorm(1,r[k],1.8)
pro_Fr[k]=pnorm(pro_r[k],0,sqrt(Vr[k,k]))
pro_u[k]=makeu(pro_Fr[k],probs)
MH=dmvnorm(pro_r,mean=rep(0, nq),sigma=Vr,log=T)+dpois(y[k],E[k]*exp(as.numeric(X[k,]%*%beta)+theta[pro_u[k]]),log=T)-
dmvnorm(r,mean=rep(0, nq),sigma=Vr,log=T)-dpois(y[k],E[k]*exp(as.numeric(X[k,]%*%beta)+theta[u[k]]),log=T)
if(runif(1,0,1)<exp(MH)){
r[k]=pro_r[k]
F_r[k]=pro_Fr[k]
u[k]=pro_u[k]
acceptr=acceptr+1
}
}
######################
### update v ###
######################
#pro_v=v
for (j in 1:n.atoms){
#0.9
pro_v=v
pro_v[j]<-rnorm(1,v[j],0.04)
while (pro_v[j]<0 || pro_v[j]>1) {pro_v[j]<-rnorm(1,v[j],0.04)}
#if(pro_v[j]>0 & pro_v[j]<1){
pro_probs=makeprobs(pro_v)
pro_u=makeu(F_r,pro_probs)
MH=log(dbeta(pro_v[j],1,alpha))+sum(dpois(y,E*exp(X%*%beta+theta[pro_u]),log=T))-
log(dbeta(v[j],1,alpha))-sum(dpois(y,E*exp(X%*%beta+theta[u]),log=T))
if(runif(1,0,1)<exp(MH)){
v[j]=pro_v[j]
probs=pro_probs
u=pro_u
acceptv=acceptv+1
}
# }
}
######################
### update taus ###
######################
taus=rgamma(1,shape=1/2*n.atoms+c,rate=sum(theta^2)/2+d2)
######################
### update rho ###
######################
pro_eta1 = rnorm(1, eta[1], 0.5)
pro_eta2 = rnorm(1, eta[2], 0.5)
pro_eta3 = rnorm(1, eta[3], 0.5)
pro_eta4 = rnorm(1, eta[4], 0.5)
pro_eta = c(pro_eta1, pro_eta2, pro_eta3, pro_eta4)
pro_rho = exp(pro_eta)/(1+exp(pro_eta))
pro_Q = Dinv_new(Rho = pro_rho, n, cn, ns, udnei, q)
pro_invQ1 = solve(pro_Q[[1]])
pro_invQ2 = solve(pro_Q[[2]])
pro_invQ3 = solve(pro_Q[[3]])
pro_invQ4 = solve(pro_Q[[4]])
kprod = kronecker(A, diag(n))
pro_invQ = as.matrix(bdiag(bdiag(pro_invQ1, pro_invQ2), bdiag(pro_invQ3, pro_invQ4)))
pro_Vr = kprod %*% pro_invQ %*% t(kprod)
MH = dmvnorm(r,mean=rep(0, nq),sigma=as.matrix(forceSymmetric(pro_Vr)),log=T) +
log(pro_rho[1]) + log(1-pro_rho[1]) + log(pro_rho[2]) + log(1-pro_rho[2]) +
log(pro_rho[3]) + log(1-pro_rho[3]) + log(pro_rho[4]) + log(1-pro_rho[4]) -
dmvnorm(r,mean=rep(0, nq),sigma=Vr,log=T) - log(rho[1]) - log(1-rho[1]) - log(rho[2]) - log(1-rho[2]) -
log(rho[3]) - log(1-rho[3]) - log(rho[4]) - log(1-rho[4])
if(runif(1,0,1)<exp(MH)){
eta = pro_eta
rho = pro_rho
Vr = as.matrix(forceSymmetric(pro_Vr))
acceptrho=acceptrho+1
}
######################
### update A ###
######################
#0.06
pro_A = matrix(0, q, q)
for(i in 1:q){
for(j in 1:i){
if(j == i){
pro_A[i, j] = exp(rnorm(1, log(A[i, j]), 0.04))
#pro_A[i, j] = rlnorm(1, log(A[i, j]), 0.1)
}else{
pro_A[i, j] = rnorm(1, A[i, j], 0.03)
}
}
}
Q = Dinv_new(Rho = rho, n, cn, ns, udnei, q=4)
invQ1 = solve(Q[[1]])
invQ2 = solve(Q[[2]])
invQ3 = solve(Q[[3]])
invQ4 = solve(Q[[4]])
Sigma = A %*% t(A)
pro_kprod = kronecker(pro_A, diag(n))
invQ = as.matrix(bdiag(bdiag(invQ1, invQ2), bdiag(invQ3, invQ4)))
pro_Vr = pro_kprod %*% as.matrix(forceSymmetric(invQ)) %*% t(pro_kprod)
pro_Sigma = pro_A %*% t(pro_A)
lpA = -(nu+4)/2*logdet(Sigma) - 1/2*sum(diag(nu*R%*%solve(Sigma))) + log(Jacob_A(A)) + sum(log(diag(A)))
pro_lpA = -(nu+4)/2*logdet(pro_Sigma) - 1/2*sum(diag(nu*R%*%solve(pro_Sigma))) + log(Jacob_A(pro_A)) + sum(log(diag(pro_A)))
MH = dmvnorm(r,mean=rep(0, nq),sigma=as.matrix(forceSymmetric(pro_Vr)),log=T) + pro_lpA -
dmvnorm(r,mean=rep(0, nq),sigma=Vr,log=T) - lpA
if(runif(1,0,1)<exp(MH)){
A = pro_A
Vr = as.matrix(forceSymmetric(pro_Vr))
acceptA=acceptA+1
}
#kprod = kronecker(A, diag(n))
#Vr = as.matrix(forceSymmetric(kprod %*% invQ %*% t(kprod)))
######################
### record samples ###
######################
keeptheta[iter,] = theta
keepphi[iter,] = theta[u]
keepA[iter,] = as.vector(A)[-c(5,9,10,13:15)]
keepbeta[iter,]=beta
keeptaus[iter]=taus
keeprho1[iter]=rho[1]
keeprho2[iter]=rho[2]
keeprho3[iter]=rho[3]
keeprho4[iter]=rho[4]
keepu[iter,]=u
keepv[iter,]=v
keepr[iter,]=r
keepFr[iter,]=F_r
# cat("iteration = ", i, "acceptance rate of r = ", acceptr/(n*i),"acceptance rate of v = ",acceptv/(n.atoms*i), "\n")
}
list(beta=keepbeta[burn:runs,], A = keepA[burn:runs,],phi=keepphi[burn:runs,],theta=keeptheta[burn:runs,],u=keepu[burn:runs,],
v=keepv[burn:runs,],r=keepr[burn:runs,],Fr=keepFr[burn:runs,], taus=keeptaus[burn:runs],
rho1=keeprho1[burn:runs], rho2=keeprho2[burn:runs], rho3=keeprho3[burn:runs], rho4=keeprho4[burn:runs])
}
set.seed(123)
mcmc_samples=ARDP_joint_diff_smoke(y=Y, x1=as.matrix(X1[,1:2]), x2=as.matrix(X2[,1:2]), x3 = as.matrix(X3[,1:2]), x4 = as.matrix(X4[,1:2]),
X = X, E=E, Minc, alpha=1, q = 4, n.atoms=15, runs=30000, burn=20000)
## [1] 30000
## [1] 0.2004067
## [1] 0.177686
## [1] 0.1337045
## [1] 0.1909753
## [1] 0.2943876
## [1] 0.190873
## [1] 0.2066069
## [1] 0.2041735
## [1] 0.2069069
sample.mcmc = cbind(mcmc_samples$taus, mcmc_samples$rho1, mcmc_samples$rho2, mcmc_samples$rho3, mcmc_samples$rho4)
#Summary function
mysummary = function(invector) {
c(mean(invector), quantile(invector, .025), quantile(invector,.975))
}
#Coefficients
cov_effect = round(t(apply(mcmc_samples$beta, 2, mysummary)),3)
beta_est = paste(cov_effect[,1], " (", cov_effect[,2], ", ", cov_effect[,3], ")", sep="")
beta_table = data.frame(t(matrix(beta_est, ncol = 2, byrow = TRUE)))
colnames(beta_table) = c("Lung", "Esophageal", "Larynx", "Colorectal")
rownames(beta_table) = c("Intercept", "Smoking")
kable(beta_table)
| Lung | Esophageal | Larynx | Colorectal | |
|---|---|---|---|---|
| Intercept | -0.066 (-0.109, -0.022) | -0.101 (-0.194, 0.004) | -0.127 (-0.337, 0.015) | 0.063 (0.018, 0.105) |
| Smoking | 0.02 (0.016, 0.025) | 0.019 (0.008, 0.027) | 0.019 (0.008, 0.034) | -0.012 (-0.015, -0.009) |
phis <- mcmc_samples$phi
phis1 <- phis[,1:58][,order(final_perm)]
phis2 <- phis[,59:116][,order(final_perm)]
phis3 <- phis[,117:174][,order(final_perm)]
phis4 <- phis[,175:232][,order(final_perm)]
phis_origin <- cbind(phis1, phis2, phis3, phis4)
vij_samples1 <- t(apply(phis1, 1, function(x){
diff_b1 <- NULL
for(i in 1:nrow(neighbor_list0)){
if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
diff_b1 <- c(diff_b1, 1)
}else{
diff_b1 <- c(diff_b1, 0)
}
}
return(diff_b1)
}))
vij_samples2 <- t(apply(phis2, 1, function(x){
diff_b1 <- NULL
for(i in 1:nrow(neighbor_list0)){
if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
diff_b1 <- c(diff_b1, 1)
}else{
diff_b1 <- c(diff_b1, 0)
}
}
return(diff_b1)
}))
vij_samples3 <- t(apply(phis3, 1, function(x){
diff_b1 <- NULL
for(i in 1:nrow(neighbor_list0)){
if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
diff_b1 <- c(diff_b1, 1)
}else{
diff_b1 <- c(diff_b1, 0)
}
}
return(diff_b1)
}))
vij_samples4 <- t(apply(phis4, 1, function(x){
diff_b1 <- NULL
for(i in 1:nrow(neighbor_list0)){
if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
diff_b1 <- c(diff_b1, 1)
}else{
diff_b1 <- c(diff_b1, 0)
}
}
return(diff_b1)
}))
#probabilities
pvij1 <- apply(vij_samples1, 2, mean)
pvij2 <- apply(vij_samples2, 2, mean)
pvij3 <- apply(vij_samples3, 2, mean)
pvij4 <- apply(vij_samples4, 2, mean)
names(pvij1) <- neighbor_name
names(pvij2) <- neighbor_name
names(pvij3) <- neighbor_name
names(pvij4) <- neighbor_name
T_edge1 = seq(0, 135, 1)[-1]
threshold1 = sort(pvij1, decreasing = TRUE)[T_edge1]
threshold2 = sort(pvij2, decreasing = TRUE)[T_edge1]
threshold3 = sort(pvij3, decreasing = TRUE)[T_edge1]
threshold4 = sort(pvij4, decreasing = TRUE)[T_edge1]
FDR_est1 <- rep(0, length(T_edge1))
FDR_est2 <- rep(0, length(T_edge1))
FDR_est3 <- rep(0, length(T_edge1))
FDR_est4 <- rep(0, length(T_edge1))
for(i in 1:length(threshold1)){
th1 <- threshold1[i]
est_diff1 <- as.numeric(pvij1 >= th1)
FDR_est1[i] <- sum((1-pvij1) * est_diff1) / sum(est_diff1)
th2 <- threshold2[i]
est_diff2 <- as.numeric(pvij2 >= th2)
FDR_est2[i] <- sum((1-pvij2) * est_diff2) / sum(est_diff2)
th3 <- threshold3[i]
est_diff3 <- as.numeric(pvij3 >= th3)
FDR_est3[i] <- sum((1-pvij3) * est_diff3) / sum(est_diff3)
th4 <- threshold4[i]
est_diff4 <- as.numeric(pvij4 >= th4)
FDR_est4[i] <- sum((1-pvij4) * est_diff4) / sum(est_diff4)
}
# Use the same threshold as MDAGAR and identify difference boundary for each cancer individually
alpha_n = 0.025
T1 = sum(FDR_est1<=alpha_n)
T2 = sum(FDR_est2<=alpha_n)
T3 = sum(FDR_est3<=alpha_n)
T4 = sum(FDR_est4<=alpha_n)
est_diff1 <- as.numeric(pvij1 >= threshold1[T1])
est_diff2 <- as.numeric(pvij2 >= threshold2[T2])
est_diff3 <- as.numeric(pvij3 >= threshold3[T3])
est_diff4 <- as.numeric(pvij4 >= threshold4[T4])
edge_plot1 <- ggplot() +
geom_polygon(data = ca.poly.df, aes(long, lat, group = group),
color = "black",
fill = "white"
)
for(i in which(est_diff1 == 1)){
edge_plot1 = edge_plot1 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=16, face="bold",hjust = 0.5)) +
ggtitle(paste("Lung (T = ", T1, ")", sep=""))
}
edge_plot2 <- ggplot() +
geom_polygon(data = ca.poly.df, aes(long, lat, group = group),
color = "black",
fill = "white"
)
for(i in which(est_diff2 == 1)){
edge_plot2 = edge_plot2 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=16, face="bold",hjust = 0.5)) +
ggtitle(paste("Esophageal (T = ", T2, ")", sep=""))
}
edge_plot3 <- ggplot() +
geom_polygon(data = ca.poly.df, aes(long, lat, group = group),
color = "black",
fill = "white"
)
for(i in which(est_diff3 == 1)){
edge_plot3 = edge_plot3 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=16, face="bold",hjust = 0.5)) +
ggtitle(paste("Larynx (T = ", T3, ")", sep=""))
}
edge_plot4 <- ggplot() +
geom_polygon(data = ca.poly.df, aes(long, lat, group = group),
color = "black",
fill = "white"
)
for(i in which(est_diff4 == 1)){
edge_plot4 = edge_plot4 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=16, face="bold",hjust = 0.5)) +
ggtitle(paste("Colorectal (T = ", T4, ")", sep=""))
}
text <- "Figure 8. Difference boundaries (highlighted in red and blue) detected by MDAGAR after accounting for smoking rates.\n The values in brackets are the number of difference boundaries detected"
tgrob <- text_grob(text,size = 22)
plot_0 <- as_ggplot(tgrob) + theme(plot.margin = margin(0,3,0,24, "cm"))
ggarrange(plot_0, NULL, edge_plot1, edge_plot2, edge_plot3, edge_plot4, nrow = 3, ncol = 2, heights = c(1,5,5))
# Construct covariates matrix
X = as.matrix(bdiag(bdiag(X1, X2), bdiag(X3,X4)))
ARDP_joint_diff<-function(y=Y, x1=as.matrix(X1), x2=as.matrix(X2), x3 = as.matrix(X3), x4 = as.matrix(X4),
X = X, E = E, Minc, alpha=1, q = 4, n.atoms=15, runs=10000, burn=1000){
#y: data
#x: covariates
#n.atoms: number of atoms in the mixture dist.
#theta: the theta's (iid from baseline) in the model
#alpha: v~beta(1,alpha)
#u: the index indicator of spatial random effect
#rho: sptatial autocorrelation parameter in DAGAR
#Minc: 0-1 adjacency matrix
#V_r: covariance matrix of joint MDAGAR
#Q: presicion matrix of DAGAR
#r: random effects following DAGAR
#F_r: Marginal CDF of r
#taued: presicion in Baseline of DP for disease d
#taus: precision for theta
nq<-length(y)
n <- nq / q
p<-ncol(X)
y1 <- y[1:n]
y2 <- y[(n+1):(2*n)]
y3 <- y[(2*n+1):(3*n)]
y4 <- y[(3*n+1):(4*n)]
E1 <- E[1:n]
E2 <- E[(n+1):(2*n)]
E3 <- E[(2*n+1):(3*n)]
E4 <- E[(3*n+1):(4*n)]
sigmasq_beta = 10000
keepbeta=matrix(0,runs,p)
keepphi=matrix(0,runs,nq)
keeptheta=matrix(0,runs,n.atoms)
keepA=matrix(0,runs,10)
keepu=matrix(0,runs,nq)
keeprho1=keeprho2=keeprho3=keeprho4=keeptaus = rep(0,runs)
keepv=matrix(0,runs,n.atoms)
keepr=matrix(0,runs,nq)
keepFr=matrix(0,runs,nq)
#initial values
theta=rep(0,n.atoms)
beta1<-rep(0,ncol(x1))
beta2<-rep(0,ncol(x2))
beta3<-rep(0,ncol(x3))
beta4<-rep(0,ncol(x4))
beta = c(beta1, beta2, beta3, beta4)
taus = 1
c=2
d=0.1
d2=1
v<-rep(.1,n.atoms)
probs=makeprobs(v)
#A matrix
rho=c(0.5, 0.5, 0.5, 0.5)
eta = log(rho/(1-rho))
A = matrix(0, q, q)
for(i in 1:q){
for(j in 1:i){
if(j == i){
A[i, j] = exp(rnorm(1))
}else{
A[i, j] = rnorm(1)
}
}
}
Q = Dinv_new(Rho = rho, n, cn, ns, udnei, q)
invQ1 = solve(Q[[1]])
invQ2 = solve(Q[[2]])
invQ3 = solve(Q[[3]])
invQ4 = solve(Q[[4]])
kprod = kronecker(A, diag(n))
invQ = as.matrix(bdiag(bdiag(invQ1, invQ2), bdiag(invQ3, invQ4)))
Vr = as.matrix(forceSymmetric(kprod %*% invQ %*% t(kprod)))
r = rmvnorm(1, rep(0, nq), Vr)
F_r=pnorm(r,0,sqrt(diag(Vr)))
u=makeu(F_r,probs)
phi <- theta[u]
nu = 2
R = 0.1 * diag(q)
acceptr=acceptv=acceptrho=acceptA=0
acceptbeta1 = acceptbeta2=acceptbeta3=acceptbeta4 = 0
accepttheta = 0
count<-afterburn<-0
burn = burn + 1
for(iter in 1:runs){
if(iter == runs){
print(iter)
print(acceptA/(iter-1))
print(acceptr/nq/(iter-1))
print(acceptrho/(iter-1))
print(acceptv/n.atoms/(iter-1))
print(accepttheta/n.atoms/(iter-1))
print(acceptbeta1/(iter-1))
print(acceptbeta2/(iter-1))
print(acceptbeta3/(iter-1))
print(acceptbeta4/(iter-1))
}
######################
### update beta ###
######################
# update beta (intercept only model)
pro_beta1=rmvnorm(1,beta1,0.0000001*diag(ncol(x1)))
MHrate1=sum(-E1*exp(pro_beta1%*%t(x1)+theta[u][1:n])+y1*(pro_beta1%*%t(x1)+theta[u][1:n]))-
sum(-E1*exp(beta1%*%t(x1)+theta[u][1:n])+y1*(beta1%*%t(x1)+theta[u][1:n]))
if(runif(1,0,1)<exp(MHrate1)){
beta1<-pro_beta1
acceptbeta1=acceptbeta1+1
}
pro_beta2=rmvnorm(1,beta2,0.000002*diag(ncol(x2)))
MHrate2=sum(-E2*exp(pro_beta2%*%t(x2)+theta[u][(n+1):(2*n)])+y2*(pro_beta2%*%t(x2)+theta[u][(n+1):(2*n)]))-
sum(-E2*exp(beta2%*%t(x2)+theta[u][(n+1):(2*n)])+y2*(beta2%*%t(x2)+theta[u][(n+1):(2*n)]))
if(runif(1,0,1)<exp(MHrate2)){
beta2<-pro_beta2
acceptbeta2=acceptbeta2+1
}
pro_beta3=rmvnorm(1,beta3,0.000004*diag(ncol(x3)))
MHrate3=sum(-E3*exp(pro_beta3%*%t(x3)+theta[u][(2*n+1):(3*n)])+y3*(pro_beta3%*%t(x3)+theta[u][(2*n+1):(3*n)]))-
sum(-E3*exp(beta3%*%t(x3)+theta[u][(2*n+1):(3*n)])+y3*(beta3%*%t(x3)+theta[u][(2*n+1):(3*n)]))
if(runif(1,0,1)<exp(MHrate3)){
beta3<-pro_beta3
acceptbeta3=acceptbeta3+1
}
pro_beta4=rmvnorm(1,beta4,0.0000002*diag(ncol(x4)))
MHrate4=sum(-E4*exp(pro_beta4%*%t(x4)+theta[u][(3*n+1):(4*n)])+y4*(pro_beta4%*%t(x4)+theta[u][(3*n+1):(4*n)]))-
sum(-E4*exp(beta4%*%t(x4)+theta[u][(3*n+1):(4*n)])+y4*(beta4%*%t(x4)+theta[u][(3*n+1):(4*n)]))
if(runif(1,0,1)<exp(MHrate4)){
beta4<-pro_beta4
acceptbeta4=acceptbeta4+1
}
beta <- c(beta1, beta2, beta3, beta4)
#########################
### update theta ###
#########################
u1 = u[1:n]
u2 = u[(n+1):(2*n)]
u3 = u[(2*n+1):(3*n)]
u4 = u[(3*n+1):(4*n)]
for (j in 1:n.atoms){
count[j]=sum(u==j)
if (count[j]==0){
theta[j]=rnorm(1,0,sqrt(1/taus))
}else{
pro_theta=rnorm(1,theta[j],0.06)
MHrate<-sum(-(E[u==j])*exp(X[u==j,] %*% beta+pro_theta)+(y[u==j])*(X[u==j,] %*% beta+pro_theta))-taus/2*pro_theta^2-
sum(-(E[u==j])*exp(X[u==j,] %*% beta+theta[j])+(y[u==j])*(X[u==j,] %*% beta+theta[j]))+taus/2*theta[j]^2
if(runif(1,0,1)<exp(MHrate)){
theta[j]<-pro_theta
accepttheta=accepttheta+1
}
}
}
######################
### update r ###
######################
for (k in 1:nq){
pro_r=r;pro_Fr=F_r;pro_u=u
pro_r[k]=rnorm(1,r[k],1.8)
pro_Fr[k]=pnorm(pro_r[k],0,sqrt(Vr[k,k]))
pro_u[k]=makeu(pro_Fr[k],probs)
MH=dmvnorm(pro_r,mean=rep(0, nq),sigma=Vr,log=T)+dpois(y[k],E[k]*exp(as.numeric(X[k,]%*%beta)+theta[pro_u[k]]),log=T)-
dmvnorm(r,mean=rep(0, nq),sigma=Vr,log=T)-dpois(y[k],E[k]*exp(as.numeric(X[k,]%*%beta)+theta[u[k]]),log=T)
if(runif(1,0,1)<exp(MH)){
r[k]=pro_r[k]
F_r[k]=pro_Fr[k]
u[k]=pro_u[k]
acceptr=acceptr+1
}
}
######################
### update v ###
######################
#pro_v=v
for (j in 1:n.atoms){
pro_v=v
pro_v[j]<-rnorm(1,v[j],0.05)
while (pro_v[j]<0 || pro_v[j]>1) {pro_v[j]<-rnorm(1,v[j],0.05)}
pro_probs=makeprobs(pro_v)
pro_u=makeu(F_r,pro_probs)
MH=log(dbeta(pro_v[j],1,alpha))+sum(dpois(y,E*exp(X%*%beta+theta[pro_u]),log=T))-
log(dbeta(v[j],1,alpha))-sum(dpois(y,E*exp(X%*%beta+theta[u]),log=T))
if(runif(1,0,1)<exp(MH)){
v[j]=pro_v[j]
probs=pro_probs
u=pro_u
acceptv=acceptv+1
}
# }
}
######################
### update taus ###
######################
taus=rgamma(1,shape=1/2*n.atoms+c,rate=sum(theta^2)/2+d2)
######################
### update rho ###
######################
pro_eta1 = rnorm(1, eta[1], 0.5)
pro_eta2 = rnorm(1, eta[2], 0.5)
pro_eta3 = rnorm(1, eta[3], 0.5)
pro_eta4 = rnorm(1, eta[4], 0.5)
pro_eta = c(pro_eta1, pro_eta2, pro_eta3, pro_eta4)
pro_rho = exp(pro_eta)/(1+exp(pro_eta))
pro_Q = Dinv_new(Rho = pro_rho, n, cn, ns, udnei, q)
pro_invQ1 = solve(pro_Q[[1]])
pro_invQ2 = solve(pro_Q[[2]])
pro_invQ3 = solve(pro_Q[[3]])
pro_invQ4 = solve(pro_Q[[4]])
kprod = kronecker(A, diag(n))
pro_invQ = as.matrix(bdiag(bdiag(pro_invQ1, pro_invQ2), bdiag(pro_invQ3, pro_invQ4)))
pro_Vr = kprod %*% pro_invQ %*% t(kprod)
MH = dmvnorm(r,mean=rep(0, nq),sigma=as.matrix(forceSymmetric(pro_Vr)),log=T) +
log(pro_rho[1]) + log(1-pro_rho[1]) + log(pro_rho[2]) + log(1-pro_rho[2]) +
log(pro_rho[3]) + log(1-pro_rho[3]) + log(pro_rho[4]) + log(1-pro_rho[4]) -
dmvnorm(r,mean=rep(0, nq),sigma=Vr,log=T) - log(rho[1]) - log(1-rho[1]) - log(rho[2]) - log(1-rho[2]) -
log(rho[3]) - log(1-rho[3]) - log(rho[4]) - log(1-rho[4])
if(runif(1,0,1)<exp(MH)){
eta = pro_eta
rho = pro_rho
Vr = as.matrix(forceSymmetric(pro_Vr))
acceptrho=acceptrho+1
}
######################
### update A ###
######################
#0.06
pro_A = matrix(0, q, q)
for(i in 1:q){
for(j in 1:i){
if(j == i){
pro_A[i, j] = exp(rnorm(1, log(A[i, j]), 0.03))
#pro_A[i, j] = rlnorm(1, log(A[i, j]), 0.1)
}else{
pro_A[i, j] = rnorm(1, A[i, j], 0.03)
}
}
}
Q = Dinv_new(Rho = rho, n, cn, ns, udnei, q=4)
invQ1 = solve(Q[[1]])
invQ2 = solve(Q[[2]])
invQ3 = solve(Q[[3]])
invQ4 = solve(Q[[4]])
Sigma = A %*% t(A)
pro_kprod = kronecker(pro_A, diag(n))
invQ = as.matrix(bdiag(bdiag(invQ1, invQ2), bdiag(invQ3, invQ4)))
pro_Vr = pro_kprod %*% as.matrix(forceSymmetric(invQ)) %*% t(pro_kprod)
pro_Sigma = pro_A %*% t(pro_A)
lpA = -(nu+4)/2*logdet(Sigma) - 1/2*sum(diag(nu*R%*%solve(Sigma))) + log(Jacob_A(A)) + sum(log(diag(A)))
pro_lpA = -(nu+4)/2*logdet(pro_Sigma) - 1/2*sum(diag(nu*R%*%solve(pro_Sigma))) + log(Jacob_A(pro_A)) + sum(log(diag(pro_A)))
MH = dmvnorm(r,mean=rep(0, nq),sigma=as.matrix(forceSymmetric(pro_Vr)),log=T) + pro_lpA -
dmvnorm(r,mean=rep(0, nq),sigma=Vr,log=T) - lpA
if(runif(1,0,1)<exp(MH)){
A = pro_A
Vr = as.matrix(forceSymmetric(pro_Vr))
acceptA=acceptA+1
}
######################
### record samples ###
######################
keeptheta[iter,] = theta
keepphi[iter,] = theta[u]
keepA[iter,] = as.vector(A)[-c(5,9,10,13:15)]
keepbeta[iter,]=beta
keeptaus[iter]=taus
keeprho1[iter]=rho[1]
keeprho2[iter]=rho[2]
keeprho3[iter]=rho[3]
keeprho4[iter]=rho[4]
keepu[iter,]=u
keepv[iter,]=v
keepr[iter,]=r
keepFr[iter,]=F_r
# cat("iteration = ", i, "acceptance rate of r = ", acceptr/(n*i),"acceptance rate of v = ",acceptv/(n.atoms*i), "\n")
}
list(beta=keepbeta[burn:runs,], A = keepA[burn:runs,],phi=keepphi[burn:runs,],theta=keeptheta[burn:runs,],u=keepu[burn:runs,],
v=keepv[burn:runs,],r=keepr[burn:runs,],Fr=keepFr[burn:runs,], taus=keeptaus[burn:runs],
rho1=keeprho1[burn:runs], rho2=keeprho2[burn:runs], rho3=keeprho3[burn:runs], rho4=keeprho4[burn:runs])
}
set.seed(123)
mcmc_samples=ARDP_joint_diff(y=Y, x1=as.matrix(X1), x2=as.matrix(X2), x3 = as.matrix(X3), x4 = as.matrix(X4),
X = X, E=E, Minc, alpha=1, q = 4, n.atoms=15, runs=30000, burn=20000)
## [1] 30000
## [1] 0.1171039
## [1] 0.1961909
## [1] 0.1452048
## [1] 0.3426181
## [1] 0.3084636
## [1] 0.2891763
## [1] 0.2085736
## [1] 0.1963399
## [1] 0.2082736
sample.mcmc = cbind(mcmc_samples$taus, mcmc_samples$rho1, mcmc_samples$rho2, mcmc_samples$rho3, mcmc_samples$rho4)
#Coefficients
cov_effect = round(t(apply(mcmc_samples$beta, 2, mysummary)),3)
beta_est = paste(cov_effect[,1], " (", cov_effect[,2], ", ", cov_effect[,3], ")", sep="")
beta_table = data.frame(t(matrix(beta_est, ncol = 8, byrow = TRUE)))
colnames(beta_table) = c("Lung", "Esophageal", "Larynx", "Colorectal")
rownames(beta_table) = c("Intercept", "Smoking", "Young", "Old", "Edu", "Poverty",
"Unemp", "Black" )
kable(beta_table)
| Lung | Esophageal | Larynx | Colorectal | |
|---|---|---|---|---|
| Intercept | 0.021 (0.008, 0.032) | 0.056 (0.028, 0.091) | 0.002 (-0.043, 0.05) | 0.016 (0.002, 0.036) |
| Smoking | 0.015 (0.011, 0.02) | 0.016 (0.003, 0.031) | 0.029 (0.01, 0.053) | -0.002 (-0.007, 0.003) |
| Young | -0.015 (-0.019, -0.012) | -0.009 (-0.019, 0.001) | -0.02 (-0.039, -0.006) | -0.005 (-0.01, -0.002) |
| Old | 0.029 (0.026, 0.032) | 0.027 (0.017, 0.038) | 0.018 (0.004, 0.034) | 0.024 (0.021, 0.028) |
| Edu | -0.012 (-0.016, -0.009) | -0.026 (-0.035, -0.017) | -0.001 (-0.016, 0.012) | -0.006 (-0.009, -0.003) |
| Poverty | -0.008 (-0.012, -0.003) | -0.009 (-0.021, 0.004) | -0.012 (-0.032, 0.01) | -0.001 (-0.006, 0.004) |
| Unemp | 0.042 (0.033, 0.051) | 0.057 (0.036, 0.078) | 0.037 (0.014, 0.062) | 0.014 (0.007, 0.02) |
| Black | -0.01 (-0.012, -0.007) | -0.017 (-0.026, -0.007) | -0.009 (-0.021, 0.004) | 0.005 (0.001, 0.009) |
phis <- mcmc_samples$phi
phis1 <- phis[,1:58][,order(final_perm)]
phis2 <- phis[,59:116][,order(final_perm)]
phis3 <- phis[,117:174][,order(final_perm)]
phis4 <- phis[,175:232][,order(final_perm)]
phis_origin <- cbind(phis1, phis2, phis3, phis4)
vij_samples1 <- t(apply(phis1, 1, function(x){
diff_b1 <- NULL
for(i in 1:nrow(neighbor_list0)){
if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
diff_b1 <- c(diff_b1, 1)
}else{
diff_b1 <- c(diff_b1, 0)
}
}
return(diff_b1)
}))
vij_samples2 <- t(apply(phis2, 1, function(x){
diff_b1 <- NULL
for(i in 1:nrow(neighbor_list0)){
if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
diff_b1 <- c(diff_b1, 1)
}else{
diff_b1 <- c(diff_b1, 0)
}
}
return(diff_b1)
}))
vij_samples3 <- t(apply(phis3, 1, function(x){
diff_b1 <- NULL
for(i in 1:nrow(neighbor_list0)){
if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
diff_b1 <- c(diff_b1, 1)
}else{
diff_b1 <- c(diff_b1, 0)
}
}
return(diff_b1)
}))
vij_samples4 <- t(apply(phis4, 1, function(x){
diff_b1 <- NULL
for(i in 1:nrow(neighbor_list0)){
if(x[neighbor_list0[i,1]] != x[neighbor_list0[i,2]]){
diff_b1 <- c(diff_b1, 1)
}else{
diff_b1 <- c(diff_b1, 0)
}
}
return(diff_b1)
}))
#probabilities
pvij1 <- apply(vij_samples1, 2, mean)
pvij2 <- apply(vij_samples2, 2, mean)
pvij3 <- apply(vij_samples3, 2, mean)
pvij4 <- apply(vij_samples4, 2, mean)
names(pvij1) <- neighbor_name
names(pvij2) <- neighbor_name
names(pvij3) <- neighbor_name
names(pvij4) <- neighbor_name
T_edge1 = seq(0, 135, 1)[-1]
threshold1 = sort(pvij1, decreasing = TRUE)[T_edge1]
threshold2 = sort(pvij2, decreasing = TRUE)[T_edge1]
threshold3 = sort(pvij3, decreasing = TRUE)[T_edge1]
threshold4 = sort(pvij4, decreasing = TRUE)[T_edge1]
FDR_est1 <- rep(0, length(T_edge1))
FDR_est2 <- rep(0, length(T_edge1))
FDR_est3 <- rep(0, length(T_edge1))
FDR_est4 <- rep(0, length(T_edge1))
for(i in 1:length(threshold1)){
th1 <- threshold1[i]
est_diff1 <- as.numeric(pvij1 >= th1)
FDR_est1[i] <- sum((1-pvij1) * est_diff1) / sum(est_diff1)
th2 <- threshold2[i]
est_diff2 <- as.numeric(pvij2 >= th2)
FDR_est2[i] <- sum((1-pvij2) * est_diff2) / sum(est_diff2)
th3 <- threshold3[i]
est_diff3 <- as.numeric(pvij3 >= th3)
FDR_est3[i] <- sum((1-pvij3) * est_diff3) / sum(est_diff3)
th4 <- threshold4[i]
est_diff4 <- as.numeric(pvij4 >= th4)
FDR_est4[i] <- sum((1-pvij4) * est_diff4) / sum(est_diff4)
}
# Use the same threshold as MDAGAR and identify difference boundary for each cancer individually
alpha_n = 0.025
T1 = sum(FDR_est1<=alpha_n)
T2 = sum(FDR_est2<=alpha_n)
T3 = sum(FDR_est3<=alpha_n)
T4 = sum(FDR_est4<=alpha_n)
est_diff1 <- as.numeric(pvij1 >= threshold1[T1])
est_diff2 <- as.numeric(pvij2 >= threshold2[T2])
est_diff3 <- as.numeric(pvij3 >= threshold3[T3])
est_diff4 <- as.numeric(pvij4 >= threshold4[T4])
edge_plot1 <- ggplot() +
geom_polygon(data = ca.poly.df, aes(long, lat, group = group),
color = "black",
fill = "white"
)
for(i in which(est_diff1 == 1)){
edge_plot1 = edge_plot1 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=16, face="bold",hjust = 0.5)) +
ggtitle(paste("Lung (T = ", T1, ")", sep=""))
}
edge_plot2 <- ggplot() +
geom_polygon(data = ca.poly.df, aes(long, lat, group = group),
color = "black",
fill = "white"
)
for(i in which(est_diff2 == 1)){
edge_plot2 = edge_plot2 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=16, face="bold",hjust = 0.5)) +
ggtitle(paste("Esophageal (T = ", T2, ")", sep=""))
}
edge_plot3 <- ggplot() +
geom_polygon(data = ca.poly.df, aes(long, lat, group = group),
color = "black",
fill = "white"
)
for(i in which(est_diff3 == 1)){
edge_plot3 = edge_plot3 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=16, face="bold",hjust = 0.5)) +
ggtitle(paste("Larynx (T = ", T3, ")", sep=""))
}
edge_plot4 <- ggplot() +
geom_polygon(data = ca.poly.df, aes(long, lat, group = group),
color = "black",
fill = "white"
)
for(i in which(est_diff4 == 1)){
edge_plot4 = edge_plot4 + geom_path(aes_string(x = path[[i]][,1], y = path[[i]][,2]), color = "red") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size=16, face="bold",hjust = 0.5)) +
ggtitle(paste("Colorectal (T = ", T4, ")", sep=""))
}
text <- "Figure 9. Difference boundaries (highlighted in red and blue) detected by MDAGAR after accounting for all covariate.\n The values in brackets are the number of difference boundaries detected"
tgrob <- text_grob(text,size = 22)
plot_0 <- as_ggplot(tgrob) + theme(plot.margin = margin(0,3,0,24, "cm"))
ggarrange(plot_0, NULL, edge_plot1, edge_plot2, edge_plot3, edge_plot4, nrow = 3, ncol = 2, heights = c(1,5,5))